In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/Users/sirajussalekin/Downloads/weatherAUS.csv')
df.head()

In [None]:
df.describe()

# Rows and columns number

In [None]:
df.shape

# Null and Non-null values in the features

In [None]:
df.info()

# Dropping Columns With Highest Null Values And As It Is Not A Time Series Analysis, Dropping The Date Column 

In [None]:
df = df.drop(["Evaporation","Sunshine","Cloud9am","Cloud3pm","Location", "Date"], axis =1)
df.head()

# Dropping all the Null Value of Rows

In [None]:
df = df.dropna(axis = 0)
df.shape

In [None]:
df.columns

# Transforming the Categorical Columns Into Numerical Columns

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['WindGustDir'] = le.fit_transform(df['WindGustDir'])
df['WindDir9am'] = le.fit_transform(df['WindDir9am'])
df['WindDir3pm'] = le.fit_transform(df['WindDir3pm'])
df['RainToday'] = le.fit_transform(df['RainToday'])
df['RainTomorrow'] = le.fit_transform(df['RainTomorrow'])

# Separating Feature Columns And Class Column

In [None]:
x = df.drop(['RainTomorrow'], axis = 1)
y = df['RainTomorrow']

In [None]:
x.head()

# The Rain is increasing with high temp

In [None]:
plt.figure(figsize = (8,8))
sns.scatterplot(x = 'MaxTemp', y = 'MinTemp', hue = 'RainTomorrow' , palette = 'inferno',data = df)

# With High Humidity, Rain increases

In [None]:
plt.figure(figsize = (8,8))
sns.scatterplot(x = 'Humidity9am', y = 'Temp9am', hue = 'RainTomorrow' , palette = 'inferno',data = df)

# Correlation Heatmap

In [None]:
plt.figure(figsize = (8,8))
sns.heatmap(df.corr())

# Train and Test Data Split 

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Logistic Regression

In [27]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train,y_train)
predictions = lr.predict(x_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[16635   924]
 [ 2631  2395]]
              precision    recall  f1-score   support

           0       0.86      0.95      0.90     17559
           1       0.72      0.48      0.57      5026

    accuracy                           0.84     22585
   macro avg       0.79      0.71      0.74     22585
weighted avg       0.83      0.84      0.83     22585

0.8425946424618109


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Decision Tree Classifier With Approximately 79% Accuracy

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
predictions = dt.predict(x_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))

# Random Forest Classifier With Approximately 85% Accuracy

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(x_train,y_train)
predictions = rf.predict(x_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))

# Xgboost Classifier With 86% Accuracy

In [None]:
!pip install xgboost

In [26]:
import xgboost as xgb
xgb = xgb.XGBClassifier()
xgb.fit(x_train, y_train)
pred = xgb.predict(x_test)
print('acc',accuracy_score(y_test,pred))
print('f1',classification_report(y_test,pred))
print('matrix',confusion_matrix(y_test,pred))

acc 0.8551693601948196
f1               precision    recall  f1-score   support

           0       0.88      0.95      0.91     17559
           1       0.74      0.54      0.62      5026

    accuracy                           0.86     22585
   macro avg       0.81      0.74      0.77     22585
weighted avg       0.85      0.86      0.85     22585

matrix [[16596   963]
 [ 2308  2718]]


# Adaboost Classifier With 84% Accuracy

In [36]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=50,learning_rate=1).fit(x_train,y_train)
y_abc_predict = abc.predict(x_test)
print('acc',accuracy_score(y_test,y_abc_predict))
print('f1',classification_report(y_test,y_abc_predict))
print('matrix',confusion_matrix(y_test,y_abc_predict))

acc 0.8435244631392517
f1               precision    recall  f1-score   support

           0       0.87      0.95      0.90     17559
           1       0.72      0.49      0.58      5026

    accuracy                           0.84     22585
   macro avg       0.79      0.72      0.74     22585
weighted avg       0.83      0.84      0.83     22585

matrix [[16607   952]
 [ 2582  2444]]
