In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

In [2]:
API_KEY ="2cd887781e28691cabbbffe5546bda84"
BASE_URL ="https://api.openweathermap.org/data/2.5/"

In [3]:
def get_current_weather(city):
  # Corrected URL construction
  url =f"{BASE_URL}weather?q={city}&appid={API_KEY}&units=metric"
  # Corrected requests call
  response = requests.get(url)
  response.raise_for_status() # Raise an exception for bad status codes
  data=response.json()

  # Corrected data access and rounding
  # Using .get() with nested keys and a default value
  wind_data = data.get('wind', {})
  wind_deg = wind_data.get('deg')
  wind_speed = wind_data.get('speed')

  return{
      'city': data.get('name'),
      'current_temp': round(data['main'].get('temp')),
      'feel_like': round(data['main'].get('feels_like')),
      'temp_min': round(data['main'].get('temp_min')),
      'temp_max': round(data['main'].get('temp_max')),
      'humidity': round(data['main'].get('humidity')),
      'descriptions': data['weather'][0].get('description') if data.get('weather') else None, # Safer access
      'country': data['sys'].get('country'),
      'wind_gust_dir': wind_deg, # Using the safely accessed value
      'pressure': data['main'].get('pressure'),
      'wind_gust_speed': wind_speed # Using the safely accessed value
  }

In [4]:
#from google.colab import files
#upload =files.upload()

In [5]:
def read_historical_data():
    df=pd.read_csv('weather.csv')
    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)
    return df

In [6]:
def prepare_data(df):
  categorical =df.select_dtypes(include=['object']).columns # Corrected 'column' to 'columns'
  for cols in categorical:
    lb=LabelEncoder()
    df[cols]=lb.fit_transform(df[cols])
  x =df.drop(columns=['RainTomorrow'])
  y=df['RainTomorrow']
  return x,y,lb

In [7]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
x,y,_ = prepare_data(read_historical_data()) # Corrected: called read_historical_data() and ignored the third return value
best_score =0
best_test_size=0
for ts in np.arange(0.2,0.4,0.05): # Corrected np.arrange to np.arange and adjusted step for meaningful loop
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=ts, random_state=42) # Corrected variable names
  model =RandomForestClassifier()
  model.fit(x_train,y_train)
  score=accuracy_score(y_test,model.predict(x_test))

  if score >best_score:
    best_score =score
    best_test_size=ts
print(f"Best test_Size :{best_test_size}, Accuracy :{round(best_score, 4)}") # Rounded accuracy for better display

Best test_Size :0.35, Accuracy :0.8594


In [8]:
best_score = 0
best_seed = 0

for seed in range(1, 21):  # Try random_state from 1 to 20
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=seed)
    model = RandomForestClassifier()
    model.fit(x_train, y_train)
    score = accuracy_score(y_test, model.predict(x_test))

    if score > best_score:
        best_score = score
        best_seed = seed

print(f"Best random_state: {best_seed}, Accuracy: {round(best_score * 100, 2)}%")


Best random_state: 17, Accuracy: 88.99%


In [9]:
def train_model(x,y):
  # Corrected variable assignment from train_test_split
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=9)
  model =RandomForestClassifier(n_estimators=100)
  model.fit(x_train,y_train)
  ypred =model.predict(x_test)
  acc =accuracy_score(y_test,ypred)
  cm =confusion_matrix(y_test,ypred)
  cr =classification_report(y_test,ypred)
  print("accuracy_Score",acc)
  print("confusion_matrix",cm)
  print("Classification_report",cr)
  return model

In [10]:
def prepare_regressor(df,feature):
  x,y=[],[]
  for i in range(len(df)-1):
    x.append(df[feature].iloc[i])
    y.append(df[feature].iloc[i+1])
  x=np.array(x).reshape(-1,1) # Corrected indentation
  y=np.array(y) # Corrected typo and indentation
  return x,y

In [11]:
def regressor_model(x_train,y_train):
  model=RandomForestRegressor(n_estimators=100,random_state=42)
  model.fit(x_train,y_train)
  return model

In [12]:
def predict_future(model,current_value,step=5):
  prediction=[]
  value=current_value
  for _ in range(step):
    # Reshape the input to be 2D as expected by the model
    next_value=model.predict(np.array([value]).reshape(-1, 1))
    value =next_value[0]
    prediction.append(value)
  return prediction

In [13]:
from datetime import datetime,timedelta
import pytz

def view_weather():
  city =input("Enter the city name:  ")
  current_weather =get_current_weather(city)
  historical_data =read_historical_data()

  x,y,lb=prepare_data(historical_data) # Corrected: called historical_data()

  win_deg=current_weather['wind_gust_dir']%360 if current_weather['wind_gust_dir'] is not None else None # Handle None case
  compass_points =[
       ("N", 0, 11.25), ("NNE", 11.25, 33.75), ("NE", 33.75, 56.25),
        ("ENE", 56.25, 78.75), ("E", 78.75, 101.25), ("ESE", 101.25, 123.75),
        ("SE", 123.75, 146.25), ("SSE", 146.25, 168.75), ("S", 168.75, 191.25),
        ("SSW", 191.25, 213.75), ("SW", 213.75, 236.25), ("WSW", 236.25, 258.75),
        ("W", 258.75, 281.25), ("WNW", 281.25, 303.75), ("NW", 303.75, 326.25),
        ("NNW", 326.25, 348.75), ("N", 348.75, 360)
  ]

  compass_dir =next((points for points,start,end in compass_points if start <=win_deg <end),"unkown") if win_deg is not None else "unknown" # Handle None case

  # Use the lb returned by prepare_data to transform the current wind direction
  compass_dir_encoded=lb.transform([compass_dir])[0] if compass_dir in lb.classes_ else -1


  current_data = {
        'MinTemp': current_weather['temp_min'],
        'MaxTemp': current_weather['temp_max'],
        'WindGustDir': compass_dir_encoded, # Corrected key name to match training data
        'WindGustSpeed': current_weather['wind_gust_speed'], # Corrected key name to match training data
        'Humidity': current_weather['humidity'],
        'Pressure': current_weather['pressure'],
        'Temp': current_weather['current_temp'],
    }


  current_df=pd.DataFrame([current_data])
  rain_prediction=rain_model.predict(current_df)

  x_temp,y_temp=prepare_regressor(historical_data,'Temp')
  x_hum,y_hum=prepare_regressor(historical_data,'Humidity')

  temp_model =regressor_model(x_temp,y_temp)
  # temp_model.fit(x_temp,y_temp) # Model is already fitted in regressor_model

  hum_model = regressor_model(x_hum, y_hum)
  # hum_model.fit(x_hum, y_hum) # Model is already fitted in regressor_model

  future_temp = predict_future(temp_model, current_weather['temp_min'])
  future_humidity = predict_future(hum_model, current_weather['humidity'])

  timezone = pytz.timezone('UTC')
  now = datetime.now(timezone)
  next_hour = now + timedelta(hours=1)
  next_hour = next_hour.replace(minute=0, second=0, microsecond=0)
  future_times = [(next_hour + timedelta(hours=i)).strftime('%H:00') for i in range(5)]

  print("\n--- Current Weather ---")
  print(f"City: {current_weather['city']}")
  print(f"Country: {current_weather['country']}")
  print(f"Description: {current_weather['descriptions']}") # Corrected key name
  print(f"Temperature: {current_weather['current_temp']}°C")
  print(f"Feels Like: {current_weather['feel_like']}°C") # Corrected key name
  print(f"Min Temp: {current_weather['temp_min']}°C")
  print(f"Max Temp: {current_weather['temp_max']}°C")
  print(f"Humidity: {current_weather['humidity']}%")
  print(f"Pressure: {current_weather['pressure']} hPa")
  print(f"Wind Gust Direction: {compass_dir} ({win_deg}°)")
  print(f"Wind Gust Speed: {current_weather['wind_gust_speed']} m/s") # Corrected key name
  print(f"Rain Tomorrow Prediction: {'Yes' if rain_prediction[0] == 1 else 'No'}")

  print("\n--- Future Predictions (Next 5 Hours) ---")
  for i in range(5):
      print(f"{future_times[i]}: Temperature: {round(future_temp[i])}°C, Humidity: {round(future_humidity[i])}%")

In [15]:
rain_model =train_model(x,y)

accuracy_Score 0.8990825688073395
confusion_matrix [[85  4]
 [ 7 13]]
Classification_report               precision    recall  f1-score   support

           0       0.92      0.96      0.94        89
           1       0.76      0.65      0.70        20

    accuracy                           0.90       109
   macro avg       0.84      0.80      0.82       109
weighted avg       0.89      0.90      0.90       109



In [16]:
view_weather()


--- Current Weather ---
City: Virār
Country: IN
Description: clear sky
Temperature: 24°C
Feels Like: 24°C
Min Temp: 24°C
Max Temp: 24°C
Humidity: 74%
Pressure: 1011 hPa
Wind Gust Direction: NNE (21°)
Wind Gust Speed: 0.36 m/s
Rain Tomorrow Prediction: Yes

--- Future Predictions (Next 5 Hours) ---
02:00: Temperature: 26°C, Humidity: 45%
03:00: Temperature: 22°C, Humidity: 46%
04:00: Temperature: 23°C, Humidity: 48%
05:00: Temperature: 24°C, Humidity: 54%
06:00: Temperature: 23°C, Humidity: 58%


In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import pickle

# Load dataset
df = pd.read_csv("weather.csv")
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

# Encode categorical columns
categorical = df.select_dtypes(include=['object']).columns
for col in categorical:
    lb = LabelEncoder()
    df[col] = lb.fit_transform(df[col])

# Split data
X = df.drop(columns=['RainTomorrow'])
y = df['RainTomorrow']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

# Train model
model = RandomForestClassifier(n_estimators=100)
model.fit(x_train, y_train)

# Save model
pickle.dump(model, open("model.pkl", "wb"))
print("✅ Model saved as model.pkl")


✅ Model saved as model.pkl
