______________________________________________________________________________________________________________________________________
3. Machine Learning Models
Classification Task:
   - Apply at least three different classification algorithms (e.g., Logistic Regression, Decision Trees, Random Forest) to the `weather_classification_data.csv` dataset with the reason why you choose those models.
   - Train the models on the training set and evaluate their performance on the test set using appropriate evaluation metrics.
______________________________________________________________________________________________________________________________________


In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split



# Load the dataset
weather_df=pd.read_csv(weather_file_path)

# Display the first few rows of the dataset
print("--------Weather Classfication data--------")
print(weather_df.head())

In [2]:

def Drop_Missing_value(weather_df):
    weather_df = weather_df.dropna()
    return  weather_df

weather_df = Drop_Missing_value( weather_df)
weather_df=pd.get_dummies(weather_df,columns=['Cloud Cover'])
weather_df=pd.get_dummies(weather_df,columns=['Season'])
weather_df=pd.get_dummies(weather_df,columns=['Location'])
# weather_df=pd.get_dummies(weather_df,columns=['Weather Type'])
print(weather_df.head())
num_rows_weather = weather_df.shape[0]
print(f"Number of rows in weather_df: {num_rows_weather}")

In [3]:
X=weather_df.drop('Weather Type' ,axis=1)
Y=weather_df['Weather Type']
Xtrain, Xtest,Ytrain,Ytest = train_test_split(X,Y, test_size=0.2)

print("------Weather Classification data---------\n")
num_rows_train_weather = Xtrain.shape[0]
print(f"Number of rows of Trainig of Weather data: {num_rows_train_weather}")  
num_rows_test_weather = Xtest.shape[0]
print(f"Number of rows of Testing of Weather data: {num_rows_test_weather}") 

______________________________________________________________________________________________________________________________________
Logistic Regression Model                 
______________________________________________________________________________________________________________________________________

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(Xtrain)
X_test_scaled = scaler.transform(Xtest)
# Train Logistic Regression
lr_model = LogisticRegression(max_iter=20000)
lr_model.fit(X_train_scaled, Ytrain)

# Predict on test set
y_pred_lr = lr_model.predict(X_test_scaled)

# Evaluate Logistic Regression
print("Logistic Regression")
print("Accuracy:", accuracy_score(Ytest, y_pred_lr))
print("Classification Report:\n", classification_report(Ytest, y_pred_lr))



accuracy_log_reg = accuracy_score(Ytest, y_pred_lr)
precision_log_reg = precision_score(Ytest, y_pred_lr, average='weighted')
recall_log_reg = recall_score(Ytest, y_pred_lr, average='weighted')
f1_log_reg = f1_score(Ytest, y_pred_lr, average='weighted')



______________________________________________________________________________________________________________________________________
Decision Tree          
______________________________________________________________________________________________________________________________________

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
# Create a pipeline for decision tree
dt_pipeline = Pipeline(steps=[('classifier', DecisionTreeClassifier(random_state=42))])

# Train the model
dt_pipeline.fit(Xtrain, Ytrain)

# Make predictions
y_pred_dt = dt_pipeline.predict(Xtest)

# Evaluate the model
accuracy_dt = accuracy_score(Ytest, y_pred_dt)
print(f"Decision Tree Accuracy: {accuracy_dt}")
print(classification_report(Ytest, y_pred_dt))


accuracy_dt = accuracy_score(Ytest, y_pred_dt)
precision_dt = precision_score(Ytest, y_pred_dt, average='weighted')
recall_dt = recall_score(Ytest, y_pred_dt, average='weighted')
f1_dt = f1_score(Ytest, y_pred_dt, average='weighted')


______________________________________________________________________________________________________________________________________
Random Forest                
______________________________________________________________________________________________________________________________________

In [6]:
from sklearn.ensemble import RandomForestClassifier

# Create a pipeline for random forest
rf_pipeline = Pipeline(steps=[
                              ('classifier', RandomForestClassifier(random_state=42))])

# Train the model
rf_pipeline.fit(Xtrain, Ytrain)

# Make predictions
y_pred_rf = rf_pipeline.predict(Xtest)

# Evaluate the model
accuracy_rf = accuracy_score(Ytest, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf}")
print(classification_report(Ytest, y_pred_rf))


accuracy_rf = accuracy_score(Ytest, y_pred_rf)
precision_rf = precision_score(Ytest, y_pred_rf, average='weighted')
recall_rf = recall_score(Ytest, y_pred_rf, average='weighted')
f1_rf = f1_score(Ytest, y_pred_rf, average='weighted')

______________________________________________________________________________________________________________________________________
4. Model Evaluation and Comparison
  1 Model Performance Comparison:
   - Compare the performance of different models for classification tasks.
   - Create tables or visualizations to summarize the performance metrics of each model.
   - Discuss which model performed the best for each task and provide possible reasons for its superior performance.
______________________________________________________________________________________________________________________________________

In [7]:
summary_df = pd.DataFrame({
    'Model': ['Logistic Regression', 'Decision Tree', 'Random Forest'],
    'Accuracy': [accuracy_log_reg, accuracy_dt, accuracy_rf],
    'Precision': [precision_log_reg, precision_dt, precision_rf],
    'Recall': [recall_log_reg, recall_dt, recall_rf],
    'F1-Score': [f1_log_reg, f1_dt, f1_rf]
})

print(summary_df)

In [8]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.figure(figsize=(10, 6))
plt.bar(summary_df['Model'], summary_df['Accuracy'], color=['blue', 'green', 'red'])
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Accuracy Comparison')
plt.ylim([0, 1])
for i in range(len(summary_df)):
    plt.text(i, summary_df['Accuracy'][i] + 0.01, f"{summary_df['Accuracy'][i]:.2f}", ha='center')
plt.show()
