In [1]:
# Import Required Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Import Dataset
df=pd.read_csv("seattle-weather.csv")

In [3]:
#Checking head of dataframe
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [4]:
#Checking for count of unique values in weather column
df['weather'].value_counts()

rain       641
sun        640
fog        101
drizzle     53
snow        26
Name: weather, dtype: int64

In [5]:
#Shape of dataframe
df.shape

(1461, 6)

In [6]:
#X is our independent variable or features and y is our predicted or dependent variable
#In our features we remove date because that will not help in predicting the dependent variable weather.
#y is weather that we want to predict
y=df['weather']
X=df.drop(['weather','date'],axis=1)

In [7]:
X


Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1
...,...,...,...,...
1456,8.6,4.4,1.7,2.9
1457,1.5,5.0,1.7,1.3
1458,0.0,7.2,0.6,2.6
1459,0.0,5.6,-1.0,3.4


In [8]:
#Splitting the dataset into training and testing set
#Here we randomly assigned 70% of dataset to training set and 30% of dataset to testing set

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

In [9]:
X_train

Unnamed: 0,precipitation,temp_max,temp_min,wind
1452,6.1,5.0,2.8,7.6
762,2.0,7.8,2.8,0.8
932,0.0,23.9,13.3,2.2
435,1.3,10.6,6.1,1.1
629,0.0,21.1,13.3,2.5
...,...,...,...,...
1130,8.4,10.6,4.4,1.7
1294,0.0,33.3,17.8,3.4
860,0.5,15.6,7.2,2.1
1459,0.0,5.6,-1.0,3.4


In [10]:
X_test

Unnamed: 0,precipitation,temp_max,temp_min,wind
892,0.0,23.9,11.1,2.7
1105,5.8,7.8,6.1,0.5
413,0.0,9.4,4.4,3.4
522,0.0,26.7,12.2,2.5
1036,1.8,13.3,7.2,2.9
...,...,...,...,...
323,54.1,13.3,8.3,6.0
650,1.0,14.4,8.9,2.2
439,0.0,14.4,8.9,4.3
798,4.3,15.0,9.4,4.3


In [11]:
y_train

1452    rain
762     rain
932      sun
435     rain
629      sun
        ... 
1130    rain
1294     sun
860     rain
1459     sun
1126     fog
Name: weather, Length: 1022, dtype: object

In [12]:
y_test

892         sun
1105       rain
413        rain
522         sun
1036       rain
         ...   
323        rain
650        rain
439        rain
798        rain
135     drizzle
Name: weather, Length: 439, dtype: object

In [13]:
#Importing and creating the model 
#Here we set max_iter to 1000 to remove the warnings
#By using fit method we train the model on training set

from sklearn.linear_model import LogisticRegression
logistic_model=LogisticRegression(max_iter=1000)
logistic_model.fit(X_train,y_train)


In [14]:
#Here we make predictions from our model

y_pred=logistic_model.predict(X_test)

In [15]:
#We imported metrics to calculate and display all the performance metrics of our classification models

from sklearn.metrics import classification_report,accuracy_score,confusion_matrix

In [21]:
#Classification report which contains precison,recall,f1-score,support

print(classification_report(y_test,y_pred,zero_division=1))

              precision    recall  f1-score   support

     drizzle       1.00      0.00      0.00        14
         fog       1.00      0.00      0.00        32
        rain       0.96      0.92      0.94       192
        snow       1.00      0.12      0.22         8
         sun       0.76      1.00      0.86       193

    accuracy                           0.85       439
   macro avg       0.94      0.41      0.41       439
weighted avg       0.88      0.85      0.80       439



In [22]:
#This state the accuracy for our model

print(accuracy_score(y_test,y_pred))

0.8451025056947609


In [23]:
#This states the confusion matrix for our model

print(confusion_matrix(y_test,y_pred))

[[  0   0   0   0  14]
 [  0   0   0   0  32]
 [  0   0 177   0  15]
 [  0   0   7   1   0]
 [  0   0   0   0 193]]
