In [1]:
# Import all necessary laibrary.
import matplotlib.pyplot as plt
import scipy
import re
from scipy import stats
from scipy.stats import ttest_ind
from scipy.stats import pearsonr
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import pandas as pd


In [2]:
# Read the data set.
wh=pd.read_csv('seattle-weather.csv')
wh=pd.DataFrame(wh)
wh.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [3]:
# Checking the shape of the data.
wh.shape

(1461, 6)

In [4]:
# More info about the data.
wh.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           1461 non-null   object 
 1   precipitation  1461 non-null   float64
 2   temp_max       1461 non-null   float64
 3   temp_min       1461 non-null   float64
 4   wind           1461 non-null   float64
 5   weather        1461 non-null   object 
dtypes: float64(4), object(2)
memory usage: 68.6+ KB


In [5]:
# Check null valuess on the data set.
wh.isnull().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [6]:
# Remove Unwanted Column.
wh.drop(['date'],axis=1,inplace=True)

In [7]:
# Read the Data.
wh.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,drizzle
1,10.9,10.6,2.8,4.5,rain
2,0.8,11.7,7.2,2.3,rain
3,20.3,12.2,5.6,4.7,rain
4,1.3,8.9,2.8,6.1,rain


In [8]:
# Read last 5 rows of the Data.
wh.tail()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
1456,8.6,4.4,1.7,2.9,rain
1457,1.5,5.0,1.7,1.3,rain
1458,0.0,7.2,0.6,2.6,fog
1459,0.0,5.6,-1.0,3.4,sun
1460,0.0,5.6,-2.1,3.5,sun


In [9]:
# Create labelncoder and save in a variable.
lc=LabelEncoder()

In [10]:
# Transform the str data to numeric with the help of label encoder.
wh['Weather_N']=lc.fit_transform(wh['weather'])

In [11]:
# Read the first 5 rows of the data.
wh.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather,Weather_N
0,0.0,12.8,5.0,4.7,drizzle,0
1,10.9,10.6,2.8,4.5,rain,2
2,0.8,11.7,7.2,2.3,rain,2
3,20.3,12.2,5.6,4.7,rain,2
4,1.3,8.9,2.8,6.1,rain,2


In [12]:
# Drop the str column.
wh.drop(['weather'],axis=1,inplace=True)

In [13]:
# Once again check the data.
wh.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,Weather_N
0,0.0,12.8,5.0,4.7,0
1,10.9,10.6,2.8,4.5,2
2,0.8,11.7,7.2,2.3,2
3,20.3,12.2,5.6,4.7,2
4,1.3,8.9,2.8,6.1,2


In [14]:
# Check the unique value of 'weather_N' column.
wh['Weather_N'].unique()

array([0, 2, 4, 3, 1])

In [15]:
# We can use describe function to see maximum , minimum and average temp.
wh.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind,Weather_N
count,1461.0,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136,2.752225
std,6.680194,7.349758,5.023004,1.437825,1.19138
min,0.0,-1.6,-7.1,0.4,0.0
25%,0.0,10.6,4.4,2.2,2.0
50%,0.0,15.6,8.3,3.0,2.0
75%,2.8,22.2,12.2,4.0,4.0
max,55.9,35.6,18.3,9.5,4.0


In [16]:
# Create x and y value for create test and traning set.
x=((wh.loc[:,wh.columns!='Weather_N']).astype(int)).values[:,0:]
y=wh['Weather_N'].values

In [17]:
# Create test and traning data set.
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=2)

In [18]:
# KNN Classification and create the accuracy.
knn=KNeighborsClassifier()
knn.fit(x_train,y_train)
print('KNN accuracy:{:.2f}%'.format(knn.score(x_test,y_test)*100))

KNN accuracy:76.87%


In [19]:
# SVM Classification and create the accuracy.
svm=SVC()
svm.fit(x_train,y_train)
print('SVM accuracy:{:.2f}%'.format(svm.score(x_test,y_test)*100))

SVM accuracy:77.55%


In [20]:
# Crate Gradient Boosting Classification and accuracy.
gbc=GradientBoostingClassifier(subsample=0.5,n_estimators=450,max_depth=5,max_leaf_nodes=25)
gbc.fit(x_train,y_train)
print('GBC accuracy:{:.2f}%'.format(gbc.score(x_test,y_test)*100))

GBC accuracy:76.19%


In [21]:
# Crate XGB Classification and accuracy.
import warnings
warnings.filterwarnings('ignore')
xgb=XGBClassifier()
xgb.fit(x_train,y_train)
print('XGB accuracy:{:.2f}%'.format(xgb.score(x_test,y_test)*100))

XGB accuracy:78.23%


In [24]:
# Take a input and make prediction.
input=[[20.5,12.8,5,4.7]]
ot=xgb.predict(input)
print("The weather Would be:" ,end=" ")
if(ot==0):
  print('Drizzle')
elif (ot==1):
  print('Fogg')
elif (ot==2):
  print('Rain')
elif (ot==3):
  print('Snow')
else:
  print('Sun')


The weather Would be: Rain
