In [1]:
import pandas as  pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
df=pd.read_csv(r'seattle-weather.csv')
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [3]:
df.tail()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun
1460,2015-12-31,0.0,5.6,-2.1,3.5,sun


In [4]:
df.shape

(1461, 6)

In [5]:
df.isnull().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [6]:
df.isnull().sum().sum()

0

In [7]:
df.drop('date',axis=1,inplace=True)

In [8]:
df.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,drizzle
1,10.9,10.6,2.8,4.5,rain
2,0.8,11.7,7.2,2.3,rain
3,20.3,12.2,5.6,4.7,rain
4,1.3,8.9,2.8,6.1,rain


In [9]:
df.tail()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
1456,8.6,4.4,1.7,2.9,rain
1457,1.5,5.0,1.7,1.3,rain
1458,0.0,7.2,0.6,2.6,fog
1459,0.0,5.6,-1.0,3.4,sun
1460,0.0,5.6,-2.1,3.5,sun


In [10]:
df.weather.value_counts()

weather
rain       641
sun        640
fog        101
drizzle     53
snow        26
Name: count, dtype: int64

In [11]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['weather'] = le.fit_transform(df['weather'])

In [12]:
df.weather.value_counts()

weather
2    641
4    640
1    101
0     53
3     26
Name: count, dtype: int64

In [13]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.pipeline import Pipeline

#Preprocess Data for Machine Learning Development
X = df.drop(['weather'], axis = 1)
y = df['weather']


over_strategy = {0 : 1000, 1 : 1000, 2 : 1000, 3 : 1000, 4 : 2000}
under_strategy = {0 : 1000, 1 : 1000, 2 : 1000, 3 : 1000, 4 : 2000}

oversample = SMOTE(sampling_strategy = over_strategy)
undersample = RandomUnderSampler(sampling_strategy = under_strategy)

X_final,y = oversample.fit_resample(X,y)
X_final,y = undersample.fit_resample(X_final, y)


X_train,X_test,y_train,y_test = train_test_split(X_final,y,random_state = 10, test_size = 0.2)

In [14]:
X=df.drop('weather',axis=1)
X

Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1
...,...,...,...,...
1456,8.6,4.4,1.7,2.9
1457,1.5,5.0,1.7,1.3
1458,0.0,7.2,0.6,2.6
1459,0.0,5.6,-1.0,3.4


In [15]:
y=df['weather']
y

0       0
1       2
2       2
3       2
4       2
       ..
1456    2
1457    2
1458    1
1459    4
1460    4
Name: weather, Length: 1461, dtype: int32

In [16]:
X_train

Unnamed: 0,precipitation,temp_max,temp_min,wind
3454,0.954209,22.123312,14.769103,4.261517
3385,2.963411,16.998758,12.488989,4.908526
4338,19.297041,9.373891,2.634632,4.250927
1369,0.000000,21.100000,9.400000,1.300000
4590,3.832242,3.949178,-4.163387,5.013113
...,...,...,...,...
2434,0.000000,25.000000,14.535499,3.600000
3888,8.549605,8.478261,1.646640,5.038340
2912,0.000000,21.100000,12.897886,3.082779
36,0.000000,16.100000,1.700000,5.000000


In [17]:
y_train

3454    2
3385    2
4338    3
1369    1
4590    3
       ..
2434    1
3888    3
2912    1
36      4
3039    1
Name: weather, Length: 4800, dtype: int32

In [18]:
X_test

Unnamed: 0,precipitation,temp_max,temp_min,wind
3212,0.000000,8.783812,2.316188,3.968051
653,0.000000,15.600000,5.000000,0.900000
2773,0.000000,14.597786,4.400000,1.948340
3931,13.500000,5.938568,1.974741,3.797951
1225,0.000000,19.400000,11.100000,2.800000
...,...,...,...,...
3348,1.350227,11.100000,4.176246,3.824660
856,0.000000,16.700000,8.300000,2.600000
4399,8.476532,8.725586,1.736577,5.015856
2929,0.000000,7.693573,0.404884,2.451670


In [19]:
y_test

3212    1
653     4
2773    1
3931    3
1225    4
       ..
3348    2
856     4
4399    3
2929    1
4453    3
Name: weather, Length: 1200, dtype: int32

In [20]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier 
dt = DecisionTreeClassifier(random_state=12345)
dt = dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)
acc=accuracy_score(y_test,y_pred)
acc

0.8625

In [21]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
np.random.seed(10)
gnb.fit(X_train,y_train)
y_pred = gnb.predict(X_test)
acc=accuracy_score(y_test,y_pred)
acc

0.6483333333333333

In [22]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=52)
rf = rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
acc=accuracy_score(y_test,y_pred)
acc

0.9016666666666666

In [23]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
acc=accuracy_score(y_test,y_pred)
acc

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.6358333333333334

In [24]:
abc=[0.0,	27.8,	13.3,	3.3]
result=lr.predict([abc])
result=result[0]
if result==0:
    print("Drizzle")
elif result==1:
    print("Fog")
elif result==2:
    print("Rain")
elif result==3:
    print("Snow")
else:
    print("Sun")

Sun




In [25]:
abc=[1.5,	8.9,	3.3,	3.8]
result=dt.predict([abc])
result=result[0]
if result==0:
    print("Drizzle")
elif result==1:
    print("Fog")
elif result==2:
    print("Rain")
elif result==3:
    print("Snow")
else:
    print("Sun")

Rain




In [26]:
abc=[0.0,	19.4,	9.4,	2.0]
result=dt.predict([abc])
result=result[0]
if result==0:
    print("Drizzle")
elif result==1:
    print("Fog")
elif result==2:
    print("Rain")
elif result==3:
    print("Snow")
else:
    print("Sun")

Drizzle




In [27]:
abc=[0.000000,	20.979252,	9.442617,	1.307103]
result=dt.predict([abc])
result=result[0]
if result==0:
    print("Drizzle")
elif result==1:
    print("Fog")
elif result==2:
    print("Rain")
elif result==3:
    print("Snow")
else:
    print("Sun")

Fog




In [28]:
abc=[3.558850,	4.298997,	-4.243887,	5.288777]
result=dt.predict([abc])
result=result[0]
if result==0:
    print("Drizzle")
elif result==1:
    print("Fog")
elif result==2:
    print("Rain")
elif result==3:
    print("Snow")
else:
    print("Sun")

Snow


