In [199]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder


In [200]:
df = pd.read_csv('modified_data.csv')
df.head(5)

Unnamed: 0,YEAR,MO,DY,ALLSKY_SFC_UV_INDEX,WS10M,T2M_MAX,T2M_MIN,PRECTOTCORR,QV2M,RH2M,PS,WD10M,ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,WS2M,T2MDEW,PSC,WSC,Climate_Condition
0,2024,1,1,1.91,0.84,27.26,18.05,0.0,14.83,84.56,93.43,121.88,4.05,5.2,0.2,19.01,101.39,0.95,Humid
1,2024,1,2,1.76,1.04,26.0,19.01,1.66,15.2,85.81,93.37,78.62,4.5,4.79,0.23,19.39,101.32,1.16,Humid
2,2024,1,3,1.85,0.83,26.53,18.73,2.36,15.5,87.19,93.33,148.88,3.77,5.28,0.22,19.73,101.27,0.87,Humid
3,2024,1,4,2.0,0.69,25.66,19.66,9.41,16.36,90.56,93.33,148.69,3.96,5.44,0.2,20.61,101.27,0.77,Rainy
4,2024,1,5,1.81,0.81,26.27,20.23,8.47,16.66,90.06,93.28,121.56,3.52,5.33,0.27,20.92,101.21,0.86,Rainy


In [201]:
df.columns

Index(['YEAR', 'MO', 'DY', 'ALLSKY_SFC_UV_INDEX', 'WS10M', 'T2M_MAX',
       'T2M_MIN', 'PRECTOTCORR', 'QV2M', 'RH2M', 'PS', 'WD10M',
       'ALLSKY_SFC_SW_DWN', 'CLRSKY_SFC_SW_DWN', 'WS2M', 'T2MDEW', 'PSC',
       'WSC', 'Climate_Condition'],
      dtype='object')

In [202]:
# Features (input)
X = df[['YEAR', 'MO', 'DY', 'ALLSKY_SFC_UV_INDEX', 'WS10M', 'T2M_MAX',
       'T2M_MIN', 'PRECTOTCORR', 'QV2M', 'RH2M', 'PS', 'WD10M',
       'ALLSKY_SFC_SW_DWN', 'CLRSKY_SFC_SW_DWN', 'WS2M', 'T2MDEW', 'PSC',
       'WSC', 'Climate_Condition']]

# Target (output) - Climate Condition
y = df['Climate_Condition']


In [203]:
# Initialize the Label Encoder
label_encoder = LabelEncoder()

# Encode the Climate Condition
y_encoded = label_encoder.fit_transform(y)

# Check the encoded labels
print(label_encoder.classes_)  # Displays the categories and their corresponding numeric values


['Cold' 'Hot' 'Humid' 'Mild' 'Rainy']


In [204]:
X = pd.get_dummies(X, drop_first=True)

In [205]:
# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [213]:
X_test.head()

Unnamed: 0,YEAR,MO,DY,ALLSKY_SFC_UV_INDEX,WS10M,T2M_MAX,T2M_MIN,PRECTOTCORR,QV2M,RH2M,PS,WD10M,ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,WS2M,T2MDEW,PSC,WSC,Climate_Condition_Hot,Climate_Condition_Humid,Climate_Condition_Mild,Climate_Condition_Rainy
33,2024,2,3,2.23,0.89,29.08,19.46,0.0,15.75,82.12,93.49,86.0,5.39,5.59,0.16,20.02,101.4,1.12,False,True,False,False
9,2024,1,10,2.18,1.69,23.23,18.83,0.98,14.4,87.88,93.34,69.5,4.64,5.48,0.34,18.56,101.32,1.9,False,True,False,False
146,2024,5,26,1.72,2.5,26.6,21.52,8.06,17.88,90.0,92.9,275.06,3.96,6.22,0.6,21.93,100.76,2.51,False,False,False,True
282,2024,10,9,1.23,1.38,25.97,20.21,12.88,16.48,90.69,93.14,199.19,4.94,6.73,0.45,20.69,101.06,1.46,False,False,False,True
261,2024,9,18,1.23,2.09,26.23,20.87,6.32,16.85,90.06,93.05,286.81,6.68,6.73,0.5,21.05,100.95,2.2,False,False,False,True


In [206]:
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))


235
235
59
59


### 1.KNN

In [207]:
from sklearn.neighbors import KNeighborsClassifier

# Initialize the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn_model.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn_model.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn) * 100:.2f}%")
print(classification_report(y_test, y_pred_knn, target_names=label_encoder.classes_))


Accuracy: 81.36%
              precision    recall  f1-score   support

        Cold       0.00      0.00      0.00         3
         Hot       0.90      0.90      0.90        20
       Humid       0.67      0.60      0.63        10
        Mild       0.43      0.75      0.55         4
       Rainy       0.91      0.95      0.93        22

    accuracy                           0.81        59
   macro avg       0.58      0.64      0.60        59
weighted avg       0.79      0.81      0.80        59



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### 2.Logistic Regression


In [208]:
# Initialize the Logistic Regression model
model = LogisticRegression(max_iter=1000)

# Train the model on the training data
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Generate a detailed classification report
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy: 93.22%
              precision    recall  f1-score   support

        Cold       0.00      0.00      0.00         3
         Hot       1.00      1.00      1.00        20
       Humid       0.82      0.90      0.86        10
        Mild       1.00      1.00      1.00         4
       Rainy       0.92      1.00      0.96        22

    accuracy                           0.93        59
   macro avg       0.75      0.78      0.76        59
weighted avg       0.89      0.93      0.91        59



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### 3.Random Forest

In [231]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_model.predict(X_test)
print(y_pred_rf)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf) * 100:.2f}%")
print(classification_report(y_test, y_pred_rf, target_names=label_encoder.classes_))


[2 2 4 4 4 1 1 4 1 3 1 1 2 1 1 4 1 4 4 2 4 4 2 1 2 1 1 4 4 1 4 1 2 4 3 1 1
 4 0 0 1 4 3 1 4 4 4 0 2 2 4 2 4 1 1 4 3 1 4]
Accuracy: 100.00%
              precision    recall  f1-score   support

        Cold       1.00      1.00      1.00         3
         Hot       1.00      1.00      1.00        20
       Humid       1.00      1.00      1.00        10
        Mild       1.00      1.00      1.00         4
       Rainy       1.00      1.00      1.00        22

    accuracy                           1.00        59
   macro avg       1.00      1.00      1.00        59
weighted avg       1.00      1.00      1.00        59



In [232]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Example dataset preparation
# df should be your actual data containing historical climate data
# df = pd.read_csv('your_data.csv')

# Sample dataframe structure for training
# Here we create a mock dataset for illustration purposes
data = {
    'YEAR': [2023, 2023, 2023, 2023],
    'MO': [1, 1, 1, 1],
    'DY': [1, 2, 3, 4],
    'ALLSKY_SFC_UV_INDEX': [5, 10, 15, 20],
    'WS10M': [10, 12, 14, 16],
    'T2M_MAX': [25, 26, 27, 28],
    'T2M_MIN': [15, 16, 17, 18],
    'PRECTOTCORR': [0.1, 0.2, 0.3, 0.4],
    'QV2M': [0.008, 0.009, 0.010, 0.011],
    'RH2M': [70, 75, 80, 85],
    'PS': [1000, 1005, 1010, 1015],
    'WD10M': [90, 85, 80, 75],
    'ALLSKY_SFC_SW_DWN': [200, 210, 220, 230],
    'CLRSKY_SFC_SW_DWN': [220, 230, 240, 250],
    'WS2M': [5, 6, 7, 8],
    'T2MDEW': [14, 15, 16, 17],
    'PSC': [1, 1, 1, 1],
    'WSC': [5, 6, 7, 8],
    'Climate_Condition': ['Hot', 'Humid', 'Cold', 'Mild']
}

df = pd.DataFrame(data)

# Encode the target variable
label_encoder = LabelEncoder()
df['Climate_Condition'] = label_encoder.fit_transform(df['Climate_Condition'])

# Split features and target variable
X = df[['YEAR', 'MO', 'DY', 'ALLSKY_SFC_UV_INDEX', 'WS10M', 'T2M_MAX',
         'T2M_MIN', 'PRECTOTCORR', 'QV2M', 'RH2M', 'PS', 'WD10M',
         'ALLSKY_SFC_SW_DWN', 'CLRSKY_SFC_SW_DWN', 'WS2M', 'T2MDEW', 'PSC', 'WSC']]
y = df['Climate_Condition']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Example user input
user_input = {
    'YEAR': 2024,
    'MO': 10,
    'DY': 18,
    'ALLSKY_SFC_UV_INDEX': 5,
    'WS10M': 12,
    'T2M_MAX': 25,
    'T2M_MIN': 15,
    'PRECTOTCORR': 0.2,
    'QV2M': 0.008,
    'RH2M': 78,
    'PS': 1008,
    'WD10M': 85,
    'ALLSKY_SFC_SW_DWN': 210,
    'CLRSKY_SFC_SW_DWN': 230,
    'WS2M': 9,
    'T2MDEW': 17,
    'PSC': 1,
    'WSC': 6
}

# Create DataFrame for the user input
df_input = pd.DataFrame([user_input])

# Make the prediction
prediction = rf_model.predict(df_input)

# Decode the predicted label
predicted_condition = label_encoder.inverse_transform(prediction)

print(f'Predicted Climate Condition: {predicted_condition[0]}')


Predicted Climate Condition: Hot
