In [1]:
#importing all the necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import GridSearchCV
import PySimpleGUI as sg
import warnings

In [2]:
#importing the csv datafile
df = pd.read_csv("Dataset/seattle-weather.csv")

In [3]:
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [4]:
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


In [5]:
#for our case the date is a redundant part of the calculation do dropping the 'date' column
update_df=df.drop(['date'],axis=1)

In [6]:
update_df

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,drizzle
1,10.9,10.6,2.8,4.5,rain
2,0.8,11.7,7.2,2.3,rain
3,20.3,12.2,5.6,4.7,rain
4,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...
1456,8.6,4.4,1.7,2.9,rain
1457,1.5,5.0,1.7,1.3,rain
1458,0.0,7.2,0.6,2.6,fog
1459,0.0,5.6,-1.0,3.4,sun


In [7]:
update_df.shape

(1461, 5)

In [8]:
update_df.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind
count,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136
std,6.680194,7.349758,5.023004,1.437825
min,0.0,-1.6,-7.1,0.4
25%,0.0,10.6,4.4,2.2
50%,0.0,15.6,8.3,3.0
75%,2.8,22.2,12.2,4.0
max,55.9,35.6,18.3,9.5


In [9]:
update_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   precipitation  1461 non-null   float64
 1   temp_max       1461 non-null   float64
 2   temp_min       1461 non-null   float64
 3   wind           1461 non-null   float64
 4   weather        1461 non-null   object 
dtypes: float64(4), object(1)
memory usage: 57.2+ KB


In [10]:
update_df.isnull().sum()

precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [11]:
print(df['weather'].nunique())

5


In [12]:
le = LabelEncoder()

In [13]:
#string entities must be converted into numberic data for processing
#converting weather column entities into appropriate numerical values
update_df['weather'] = le.fit_transform(update_df['weather'])

#the numeric values respectively are as follows
print(dict(zip(le.classes_, range(len(le.classes_)))))

{'drizzle': 0, 'fog': 1, 'rain': 2, 'snow': 3, 'sun': 4}


In [14]:
update_df

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,0
1,10.9,10.6,2.8,4.5,2
2,0.8,11.7,7.2,2.3,2
3,20.3,12.2,5.6,4.7,2
4,1.3,8.9,2.8,6.1,2
...,...,...,...,...,...
1456,8.6,4.4,1.7,2.9,2
1457,1.5,5.0,1.7,1.3,2
1458,0.0,7.2,0.6,2.6,1
1459,0.0,5.6,-1.0,3.4,4


In [15]:
print(update_df['weather'].nunique())

5


In [16]:
#splitting the dataframe into input 'x' and output 'y'
x=update_df.drop('weather',axis=1)
y=update_df['weather']

In [17]:
# x=x=update_df.iloc[:,[0,1,2,3]].values #[row,[columns]]
# y=x=update_df.iloc[:,[4]].values #[row,[columns]]

In [18]:
x

Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1
...,...,...,...,...
1456,8.6,4.4,1.7,2.9
1457,1.5,5.0,1.7,1.3
1458,0.0,7.2,0.6,2.6
1459,0.0,5.6,-1.0,3.4


In [19]:
y

0       0
1       2
2       2
3       2
4       2
       ..
1456    2
1457    2
1458    1
1459    4
1460    4
Name: weather, Length: 1461, dtype: int32

In [20]:
#splitting the dataset into testing dataset and training dataset
X_train, X_test, y_train, y_test = train_test_split(x, y,shuffle=False, test_size=0.05, random_state=0)

In [21]:
X_train

Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1
...,...,...,...,...
1382,0.0,15.0,10.0,5.0
1383,0.0,21.1,9.4,3.4
1384,0.0,20.0,8.9,1.3
1385,0.3,19.4,11.7,1.3


In [22]:
X_test

Unnamed: 0,precipitation,temp_max,temp_min,wind
1387,0.3,17.2,12.2,2.6
1388,0.0,17.8,10.6,1.8
1389,0.0,16.1,8.3,1.3
1390,0.0,16.1,8.9,2.7
1391,0.0,12.8,7.2,2.6
...,...,...,...,...
1456,8.6,4.4,1.7,2.9
1457,1.5,5.0,1.7,1.3
1458,0.0,7.2,0.6,2.6
1459,0.0,5.6,-1.0,3.4


In [23]:
feature_names = ['precipitation', 'temp_max	', 'temp_min', 'wind']

In [24]:
#in this model we are using RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)  # create an instance with 100 trees

In [25]:
#training the model
rf.fit(X_train, y_train.ravel())

RandomForestClassifier(random_state=42)

In [26]:
#testing the model
y_pred = rf.predict(X_test)

In [27]:
#evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion matrix:", confusion_matrix(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))

Accuracy: 0.8378378378378378
Confusion matrix: [[ 0  0  0  0  0]
 [ 2  1  0  0  9]
 [ 0  0 51  1  0]
 [ 0  0  0  0  0]
 [ 0  0  0  0 10]]
MSE: 1.135135135135135


In [28]:
#searching for optimal parameters
params = {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, None], 'min_samples_leaf': [1, 5, 10]}
grid_search = GridSearchCV(rf, params, cv=5)
grid_search.fit(X_train, y_train.ravel())
print("Best parameters:", grid_search.best_params_)

Best parameters: {'max_depth': 5, 'min_samples_leaf': 10, 'n_estimators': 100}


In [29]:
#optimizing the model for achieving better accuracy on testing data
best_rf = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=10,random_state=42)
best_rf.fit(X_train, y_train.ravel())
y_pred = best_rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.8378378378378378


In [30]:
# {'drizzle': 0, 'fog': 1, 'rain': 2, 'snow': 3, 'sun': 4}

precipitation=12.7
temp_max=12.2
temp_min=-9.4
wind=3

test=[[precipitation,temp_max,temp_min,wind]]
print(test)

[[12.7, 12.2, -9.4, 3]]


In [31]:
#creating a function to predict on a single record
def give_pred(test):
    prediction=best_rf.predict(test)
    
    if prediction==0:
        return("Drizzle")
    elif prediction==1:
        return("Fog")
    elif prediction==2:
        return("Rain")
    elif prediction==3:
        return("Snow")
    elif prediction==4:
        return("Sun")
    else:
        return("Can't Predict")

In [32]:
#testing the prediction function
print(give_pred(test))

Rain




In [None]:
import PySimpleGUI as sg

# Define the layout of the GUI
sg.theme('DarkBlack')

warnings.filterwarnings("ignore", message="X does not have valid feature names")

layout = [
    [sg.Text('Enter the values:')],
    [sg.Text('Precipitation (mm)')],
    [sg.InputText()],
    [sg.Text('Maximum Temperature (Degree Celcius)')],
    [sg.InputText()],
    [sg.Text('Minimum Temperature (Degree Celcius)')],
    [sg.InputText()],
    [sg.Text('Wind Speed (km/hr)')],
    [sg.InputText()],
    [sg.Button('Submit')],
    [sg.Text('Output:')],
    [sg.Output(size=(50, 5))]
]

# Create the GUI window
window = sg.Window('Weather Forecast Prediction', layout)

# Event loop
while True:
    event, values = window.read()
    if event == sg.WINDOW_CLOSED:
        break
    if event == 'Submit':
        test=[[values[0],values[1],values[2],values[3]]]
        output=give_pred(test)
        print("The Weather Is : " + output)
        print("Have a Good Day..")

# Close the window
window.close()

: 