In [1]:
import numpy as np
import pandas as pd
import requests

In [2]:
df = pd.read_csv("weather_forecast_data.csv")
df.head()

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure,Rain
0,23.720338,89.592641,7.335604,50.501694,1032.378759,rain
1,27.879734,46.489704,5.952484,4.990053,992.61419,no rain
2,25.069084,83.072843,1.371992,14.855784,1007.23162,no rain
3,23.62208,74.367758,7.050551,67.255282,982.632013,rain
4,20.59137,96.858822,4.643921,47.676444,980.825142,no rain


In [3]:
df.shape

(2500, 6)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  2500 non-null   float64
 1   Humidity     2500 non-null   float64
 2   Wind_Speed   2500 non-null   float64
 3   Cloud_Cover  2500 non-null   float64
 4   Pressure     2500 non-null   float64
 5   Rain         2500 non-null   object 
dtypes: float64(5), object(1)
memory usage: 117.3+ KB


In [5]:
df.describe()

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure
count,2500.0,2500.0,2500.0,2500.0,2500.0
mean,22.581725,64.347094,9.906255,49.658104,1014.312336
std,7.326996,19.954739,5.780316,29.123104,20.196433
min,10.001842,30.005071,0.009819,0.015038,980.014486
25%,16.359398,47.339815,4.761909,23.900016,996.93863
50%,22.536448,63.920797,9.908572,49.488284,1013.433035
75%,28.976476,81.561021,14.948408,75.32414,1031.735067
max,34.995214,99.997481,19.999132,99.997795,1049.985593


In [6]:
df.isnull().sum()

Temperature    0
Humidity       0
Wind_Speed     0
Cloud_Cover    0
Pressure       0
Rain           0
dtype: int64

In [7]:
df.head()

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure,Rain
0,23.720338,89.592641,7.335604,50.501694,1032.378759,rain
1,27.879734,46.489704,5.952484,4.990053,992.61419,no rain
2,25.069084,83.072843,1.371992,14.855784,1007.23162,no rain
3,23.62208,74.367758,7.050551,67.255282,982.632013,rain
4,20.59137,96.858822,4.643921,47.676444,980.825142,no rain


In [8]:
API_KEY = "fedb3649a8c9a97c44826476b4ecdd6d"

In [9]:
def get_current_weather(city):
    URL = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={API_KEY}"
    response =requests.get(URL)
    data = response.json()
    return {
        "city":data["name"],
        "Temperature":[round(data["main"]["temp"]//10)],
        "Humidity":[data["main"]["humidity"]],
        "Wind_Speed":[data["wind"]["speed"] * 10],
        "Cloud_Cover":[data["clouds"]["all"]],
        "Pressure":[data["main"]["pressure"]]
    }

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df["Rain"] = le.fit_transform(df["Rain"])

In [11]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [13]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
y_pred_rf = rf.predict(X_test)
accuracy_score(y_test,y_pred_rf)

0.998

In [14]:
confusion_matrix(y_test,y_pred_rf)

array([[443,   0],
       [  1,  56]], dtype=int64)

In [15]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier()
gb.fit(X_train,y_train)
y_pred_gb = gb.predict(X_test)
accuracy_score(y_test,y_pred_gb)

1.0

In [16]:
confusion_matrix(y_test,y_pred_gb)

array([[443,   0],
       [  0,  57]], dtype=int64)

In [17]:
from sklearn.linear_model import LogisticRegression
lg =  LogisticRegression(random_state=48)
lg.fit(X_train,y_train)
y_pred_lg = lg.predict(X_test)
accuracy_score(y_test,y_pred_lg)

0.932

In [18]:
confusion_matrix(y_test,y_pred_lg)

array([[429,  14],
       [ 20,  37]], dtype=int64)

In [19]:
current_weather = get_current_weather("rajasthan")

In [20]:
current_weather

{'city': 'Rajasthan',
 'Temperature': [30],
 'Humidity': [18],
 'Wind_Speed': [32.599999999999994],
 'Cloud_Cover': [0],
 'Pressure': [1012]}

In [21]:
current_weather_df= pd.DataFrame(current_weather)

In [22]:
weather_df = current_weather_df[["Temperature","Humidity","Wind_Speed","Cloud_Cover","Pressure"]]

In [23]:
current_weather_df["city"][0]

'Rajasthan'

In [24]:
weather_df

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure
0,30,18,32.6,0,1012


In [25]:
lg.predict_proba(weather_df)

array([[9.99999984e-01, 1.62114142e-08]])

In [26]:
lg.predict(weather_df)

array([0])

In [28]:
import pickle 
pickle.dump(lg,open("model.pkl","wb"))