# Rain Predictions

Tutorial : [Akshit Madan](https://www.kaggle.com/akshitmadan)

## Import modules

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

plt.style.use('dark_background')

In [None]:
df = pd.read_csv('/kaggle/input/weather-dataset-rattle-package/weatherAUS.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df = df.drop(['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm', 'Location', 'Date'], axis=1)

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df = df.dropna(axis=0)

In [None]:
df.shape

In [None]:
df.columns

## Label Encoder

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['WindGustDir'] = le.fit_transform(df['WindGustDir'])
df['WindDir9am'] = le.fit_transform(df['WindDir9am'])
df['WindDir3pm'] = le.fit_transform(df['WindDir3pm'])
df['RainToday'] = le.fit_transform(df['RainToday'])
df['RainTomorrow'] = le.fit_transform(df['RainTomorrow'])

In [None]:
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

In [None]:
X.head()

In [None]:
plt.figure(figsize=(8,8))
sns.scatterplot(x='MaxTemp', y='MinTemp', hue='RainTomorrow', palette='inferno', data=df)

In [None]:
plt.figure(figsize=(8,8))
sns.scatterplot(x='Humidity9am', y='Temp9am', hue='RainTomorrow', palette='inferno', data=df)

## Heatmap

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(df.corr())

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

## Predictions score modules

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

### Logitic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(solver='liblinear')
lr_model.fit(X_train, y_train)
predictions = lr_model.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
lr_score = accuracy_score(y_test, predictions)

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
predictions = dt_model.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
dt_score = accuracy_score(y_test, predictions)

### Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
predictions = rf_model.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
rf_score = accuracy_score(y_test, predictions)

### XGBoost Classifier

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train)
pred = xgb_model.predict(X_test)
print(confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))
print(accuracy_score(y_test, pred))
xgb_score = accuracy_score(y_test, pred)

## Visualize the accuracy score

In [None]:
scores = [lr_score, dt_score, rf_score, xgb_score]
algorithm = ['lr_score', 'dt_score', 'rf_score', 'xgb_score']

In [None]:
data = []
data.append(algorithm)
data.append(scores)

In [None]:
df_visualize = pd.DataFrame(data).transpose()
df_visualize.columns=['algorithm', 'score']
df_visualize.set_index('algorithm', inplace=True)

In [None]:
df_visualize

In [None]:
df_visualize.plot(figsize=(8,5))

From the graph above we know that xgb has the highest score than others

Thanks!