<a href="https://colab.research.google.com/github/pradhansankalp10/Predictive-Model/blob/main/Weather.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import zipfile
import os

dataset_file_path = '/content/weather.zip'

destination_folder = 'Sankalp'

if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

with zipfile.ZipFile(dataset_file_path, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

print(f"Dataset unzipped to: {destination_folder}")

Dataset unzipped to: Sankalp


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier


In [3]:
data = pd.read_csv('/content/Sankalp/weather_classification_data.csv')
print(data.head())

   Temperature  Humidity  Wind Speed  Precipitation (%)    Cloud Cover  \
0         14.0        73         9.5               82.0  partly cloudy   
1         39.0        96         8.5               71.0  partly cloudy   
2         30.0        64         7.0               16.0          clear   
3         38.0        83         1.5               82.0          clear   
4         27.0        74        17.0               66.0       overcast   

   Atmospheric Pressure  UV Index  Season  Visibility (km)  Location  \
0               1010.82         2  Winter              3.5    inland   
1               1011.43         7  Spring             10.0    inland   
2               1018.72         5  Spring              5.5  mountain   
3               1026.25         7  Spring              1.0   coastal   
4                990.67         1  Winter              2.5  mountain   

  Weather Type  
0        Rainy  
1       Cloudy  
2        Sunny  
3        Sunny  
4        Rainy  


In [4]:
# Check for missing values
print(data.isnull().sum())


Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64


In [5]:
# Drop rows with missing values
data = data.dropna()

In [7]:
print(data.columns)

Index(['Temperature', 'Humidity', 'Wind Speed', 'Precipitation (%)',
       'Cloud Cover', 'Atmospheric Pressure', 'UV Index', 'Season',
       'Visibility (km)', 'Location', 'Weather Type'],
      dtype='object')


In [8]:
# Encode the target variable
label_encoder = LabelEncoder()
data['Weather Type'] = label_encoder.fit_transform(data['Weather Type'])


In [57]:
X = data.drop('Weather Type', axis=1)

In [59]:
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

In [61]:
import os
import shutil
test_folder_name = "TEST 1"
if not os.path.exists(test_folder_name):
    os.makedirs(test_folder_name)

code_file_path = "/content/Sankalp/weather_classification_data.csv"

shutil.copy(code_file_path, os.path.join(test_folder_name, "your_code_file.py"))



'TEST 1/your_code_file.py'

In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

data = pd.read_csv('/content/Sankalp/weather_classification_data.csv')
label_encoder = LabelEncoder()
data['Weather Type'] = label_encoder.fit_transform(data['Weather Type'])
X = data.drop(columns=['Weather Type'])
y = data['Weather Type']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
categorical_features = X_train.select_dtypes(include=['object']).columns
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_data_train = encoder.fit_transform(X_train[categorical_features])
encoded_data_test = encoder.transform(X_test[categorical_features])
encoded_feature_names = encoder.get_feature_names_out(categorical_features)
encoded_df_train = pd.DataFrame(encoded_data_train, columns=encoded_feature_names, index=X_train.index)
encoded_df_test = pd.DataFrame(encoded_data_test, columns=encoded_feature_names, index=X_test.index)
X_train = X_train.drop(columns=categorical_features).join(encoded_df_train)
X_test = X_test.drop(columns=categorical_features).join(encoded_df_test)
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

y_pred_dt = dt_classifier.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Accuracy:", accuracy_dt)
print("Classification Report for Decision Tree:\n", classification_report(y_test, y_pred_dt))
print("Confusion Matrix for Decision Tree:\n", confusion_matrix(y_test, y_pred_dt))

Decision Tree Accuracy: 0.9027777777777778
Classification Report for Decision Tree:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88       955
           1       0.89      0.89      0.89       982
           2       0.93      0.94      0.93      1033
           3       0.91      0.90      0.90       990

    accuracy                           0.90      3960
   macro avg       0.90      0.90      0.90      3960
weighted avg       0.90      0.90      0.90      3960

Confusion Matrix for Decision Tree:
 [[843  51  29  32]
 [ 57 874  18  33]
 [ 20  22 968  23]
 [ 38  33  29 890]]


In [63]:
import pickle

# Save the trained model to a file
filename = 'decision_tree_model.sav'
pickle.dump(dt_classifier, open(filename, 'wb'))
from google.colab import files
files.download('decision_tree_model.sav')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [65]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

test_data = pd.read_csv('/content/TEST 1/your_code_file.py')

label_encoder = LabelEncoder()
test_data['Weather Type'] = label_encoder.fit_transform(test_data['Weather Type'])
X_test_new = test_data.drop(columns=['Weather Type'])
y_test_new = test_data['Weather Type']
categorical_features = X_test_new.select_dtypes(include=['object']).columns
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_data_test_new = encoder.fit_transform(X_test_new[categorical_features])
encoded_feature_names = encoder.get_feature_names_out(categorical_features)
encoded_df_test_new = pd.DataFrame(encoded_data_test_new, columns=encoded_feature_names, index=X_test_new.index)
X_test_new = X_test_new.drop(columns=categorical_features).join(encoded_df_test_new)

In [66]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
y_pred_new = loaded_model.predict(X_test_new)
accuracy = accuracy_score(y_test_new, y_pred_new)
print(f"Accuracy on TEST 1 data: {accuracy}")
print(classification_report(y_test_new, y_pred_new))
print(confusion_matrix(y_test_new, y_pred_new))

Accuracy on TEST 1 data: 0.9708333333333333
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      3300
           1       0.97      0.97      0.97      3300
           2       0.98      0.98      0.98      3300
           3       0.97      0.97      0.97      3300

    accuracy                           0.97     13200
   macro avg       0.97      0.97      0.97     13200
weighted avg       0.97      0.97      0.97     13200

[[3188   51   29   32]
 [  57 3192   18   33]
 [  20   22 3235   23]
 [  38   33   29 3200]]


In [68]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

filename = 'decision_tree_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))

test_data = pd.read_csv('/content/TEST 1/your_code_file.py')

label_encoder = LabelEncoder()
test_data['Weather Type'] = label_encoder.fit_transform(test_data['Weather Type'])
X_test_new = test_data.drop(columns=['Weather Type'])
y_test_new = test_data['Weather Type']

categorical_features = X_test_new.select_dtypes(include=['object']).columns
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_data_test_new = encoder.fit_transform(X_test_new[categorical_features])
encoded_feature_names = encoder.get_feature_names_out(categorical_features)
encoded_df_test_new = pd.DataFrame(encoded_data_test_new, columns=encoded_feature_names, index=X_test_new.index)
X_test_new = X_test_new.drop(columns=categorical_features).join(encoded_df_test_new)

predicted_weather_types = loaded_model.predict(X_test_new)

print("Predicted Weather Types (Numerical):", predicted_weather_types)

predicted_labels = label_encoder.inverse_transform(predicted_weather_types)
print("Predicted Weather Labels:", predicted_labels)

Predicted Weather Types (Numerical): [1 0 3 ... 0 2 1]
Predicted Weather Labels: ['Rainy' 'Cloudy' 'Sunny' ... 'Cloudy' 'Snowy' 'Rainy']


In [69]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

print("Predicted Weather Labels:")
for label in predicted_labels:
    print(label)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Rainy
Rainy
Cloudy
Sunny
Cloudy
Snowy
Sunny
Cloudy
Rainy
Rainy
Cloudy
Snowy
Snowy
Snowy
Sunny
Sunny
Rainy
Rainy
Snowy
Sunny
Rainy
Snowy
Sunny
Sunny
Rainy
Snowy
Cloudy
Sunny
Snowy
Rainy
Sunny
Cloudy
Sunny
Snowy
Snowy
Rainy
Sunny
Cloudy
Rainy
Cloudy
Rainy
Snowy
Sunny
Rainy
Snowy
Snowy
Snowy
Snowy
Sunny
Snowy
Cloudy
Cloudy
Snowy
Cloudy
Sunny
Snowy
Snowy
Snowy
Snowy
Cloudy
Snowy
Cloudy
Snowy
Snowy
Cloudy
Sunny
Cloudy
Sunny
Sunny
Snowy
Snowy
Rainy
Sunny
Snowy
Rainy
Sunny
Sunny
Cloudy
Snowy
Sunny
Sunny
Sunny
Sunny
Snowy
Rainy
Rainy
Snowy
Rainy
Rainy
Rainy
Rainy
Sunny
Snowy
Snowy
Sunny
Sunny
Snowy
Cloudy
Snowy
Rainy
Sunny
Rainy
Sunny
Rainy
Cloudy
Rainy
Cloudy
Snowy
Rainy
Snowy
Sunny
Rainy
Cloudy
Snowy
Snowy
Sunny
Sunny
Sunny
Rainy
Cloudy
Snowy
Rainy
Snowy
Rainy
Rainy
Sunny
Cloudy
Sunny
Cloudy
Cloudy
Sunny
Snowy
Rainy
Snowy
Cloudy
Cloudy
Cloudy
Cloudy
Rainy
Rainy
Rainy
Cloudy
Sunny
Cloudy
Rainy
Sunny
Cloudy
Rainy
Snowy
Rainy
Clou