In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [9]:
df = pd.read_csv("data.csv")
X = df[['temp', 'RH', 'wind', 'rain']]
y = df['area']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
import pickle


def evaluate_model(model,curr_acc):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    
    print(f"Model: {model.__class__.__name__}")
    accuracy =  accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    print("-" * 50)

    if curr_acc < accuracy:
        curr_acc = accuracy
        print(f"Best Model: {model.__class__.__name__}")
        with open("model.pkl", "wb") as file:
            pickle.dump(model, file)
    return curr_acc


In [13]:
best_acc = 0

In [14]:
from sklearn.linear_model import LogisticRegression
best_acc = evaluate_model(LogisticRegression(),best_acc)

Model: LogisticRegression
Accuracy: 0.5192307692307693
              precision    recall  f1-score   support

           0       0.52      0.29      0.38        51
           1       0.52      0.74      0.61        53

    accuracy                           0.52       104
   macro avg       0.52      0.51      0.49       104
weighted avg       0.52      0.52      0.49       104

[[15 36]
 [14 39]]
--------------------------------------------------
Best Model: LogisticRegression


In [15]:
from sklearn.neighbors import KNeighborsClassifier
best_acc=evaluate_model(KNeighborsClassifier(n_neighbors=5),best_acc)

Model: KNeighborsClassifier
Accuracy: 0.5673076923076923
              precision    recall  f1-score   support

           0       0.56      0.55      0.55        51
           1       0.57      0.58      0.58        53

    accuracy                           0.57       104
   macro avg       0.57      0.57      0.57       104
weighted avg       0.57      0.57      0.57       104

[[28 23]
 [22 31]]
--------------------------------------------------
Best Model: KNeighborsClassifier


In [16]:
from sklearn.svm import SVC
best_acc=evaluate_model(SVC(kernel='linear'),best_acc)

Model: SVC
Accuracy: 0.5096153846153846
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        51
           1       0.51      1.00      0.68        53

    accuracy                           0.51       104
   macro avg       0.25      0.50      0.34       104
weighted avg       0.26      0.51      0.34       104

[[ 0 51]
 [ 0 53]]
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
from sklearn.tree import DecisionTreeClassifier
best_acc=evaluate_model(DecisionTreeClassifier(),best_acc)

Model: DecisionTreeClassifier
Accuracy: 0.5673076923076923
              precision    recall  f1-score   support

           0       0.56      0.59      0.57        51
           1       0.58      0.55      0.56        53

    accuracy                           0.57       104
   macro avg       0.57      0.57      0.57       104
weighted avg       0.57      0.57      0.57       104

[[30 21]
 [24 29]]
--------------------------------------------------


In [18]:
from sklearn.ensemble import RandomForestClassifier
best_acc=evaluate_model(RandomForestClassifier(n_estimators=100),best_acc)

Model: RandomForestClassifier
Accuracy: 0.6538461538461539
              precision    recall  f1-score   support

           0       0.65      0.63      0.64        51
           1       0.65      0.68      0.67        53

    accuracy                           0.65       104
   macro avg       0.65      0.65      0.65       104
weighted avg       0.65      0.65      0.65       104

[[32 19]
 [17 36]]
--------------------------------------------------
Best Model: RandomForestClassifier


In [19]:
from xgboost import XGBClassifier
best_acc=evaluate_model(XGBClassifier(use_label_encoder=False, eval_metric='logloss'),best_acc)

Parameters: { "use_label_encoder" } are not used.



Model: XGBClassifier
Accuracy: 0.5480769230769231
              precision    recall  f1-score   support

           0       0.54      0.59      0.56        51
           1       0.56      0.51      0.53        53

    accuracy                           0.55       104
   macro avg       0.55      0.55      0.55       104
weighted avg       0.55      0.55      0.55       104

[[30 21]
 [26 27]]
--------------------------------------------------


In [20]:
from sklearn.naive_bayes import GaussianNB
best_acc=evaluate_model(GaussianNB(),best_acc)

Model: GaussianNB
Accuracy: 0.49038461538461536
              precision    recall  f1-score   support

           0       0.49      0.98      0.65        51
           1       0.50      0.02      0.04        53

    accuracy                           0.49       104
   macro avg       0.50      0.50      0.34       104
weighted avg       0.50      0.49      0.34       104

[[50  1]
 [52  1]]
--------------------------------------------------


In [21]:
from sklearn.neural_network import MLPClassifier
best_acc=evaluate_model(MLPClassifier(hidden_layer_sizes=(50,50), max_iter=500),best_acc)

Model: MLPClassifier
Accuracy: 0.5480769230769231
              precision    recall  f1-score   support

           0       0.56      0.39      0.46        51
           1       0.54      0.70      0.61        53

    accuracy                           0.55       104
   macro avg       0.55      0.55      0.54       104
weighted avg       0.55      0.55      0.54       104

[[20 31]
 [16 37]]
--------------------------------------------------
