<a href="https://colab.research.google.com/github/nidhi-158/second-project/blob/main/Weather_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Load dataset
data = pd.read_csv("/content/weatherHistory.csv")

# Display the first few rows of the dataset
# print(data.head())

# Identify categorical and numerical features
categorical_features = ['Summary', 'Precip Type']
numerical_features = ['Temperature (C)', 'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
                      'Wind Bearing (degrees)', 'Visibility (km)', 'Pressure (millibars)']

# Drop 'Formatted Date' and 'Loud Cover' since they are not useful for prediction
data = data.drop(['Formatted Date', 'Loud Cover'], axis=1)

# Handle missing values if any
data = data.dropna()

# Encode categorical features
data_encoded = pd.get_dummies(data, columns=categorical_features, drop_first=True)

# Label encode the target variable 'Daily Summary'
label_encoder = LabelEncoder()
data_encoded['Daily Summary'] = label_encoder.fit_transform(data_encoded['Daily Summary'])

# Split data into features and target
X = data_encoded.drop('Daily Summary', axis=1)
y = data_encoded['Daily Summary']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize numerical features
scaler = StandardScaler()
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

# Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# User input directly in the main script
summary = input("Enter Summary: ")
precip_type = input("Enter Precip Type: ")
temperature = float(input("Enter Temperature (C): "))
apparent_temperature = float(input("Enter Apparent Temperature (C): "))
humidity = float(input("Enter Humidity: "))
wind_speed = float(input("Enter Wind Speed (km/h): "))
wind_bearing = int(input("Enter Wind Bearing (degrees): "))
visibility = float(input("Enter Visibility (km): "))
pressure = float(input("Enter Pressure (millibars): "))

# Create a dictionary with the input values
user_data = {
    'Summary': [summary],
    'Precip Type': [precip_type],
    'Temperature (C)': [temperature],
    'Apparent Temperature (C)': [apparent_temperature],
    'Humidity': [humidity],
    'Wind Speed (km/h)': [wind_speed],
    'Wind Bearing (degrees)': [wind_bearing],
    'Visibility (km)': [visibility],
    'Pressure (millibars)': [pressure]
}

# Create DataFrame from user input
new_data_df = pd.DataFrame(user_data)

# Perform one-hot encoding on the new input data
new_data_encoded = pd.get_dummies(new_data_df, columns=categorical_features)

# Align the new data with the training data format (ensure all columns match)
new_data_encoded = new_data_encoded.reindex(columns=X.columns, fill_value=0)

# Normalize numerical features
new_data_encoded[numerical_features] = scaler.transform(new_data_encoded[numerical_features])

# Make prediction
predicted_summary = model.predict(new_data_encoded)

# Decode the predicted label
predicted_summary_decoded = label_encoder.inverse_transform(predicted_summary)

# Output the predicted daily summary
print(f"Predicted Daily Summary: {predicted_summary_decoded[0]}")

Enter Summary: Partly Cloudy
Enter Precip Type: rain
Enter Temperature (C): 9.3
Enter Apparent Temperature (C): 7.3
Enter Humidity: 0.9
Enter Wind Speed (km/h): 14
Enter Wind Bearing (degrees): 255
Enter Visibility (km): 15
Enter Pressure (millibars): 1050
Predicted Daily Summary: Partly cloudy throughout the day.
