In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load the data
data = pd.read_csv('weather_data.csv')

# Step 2: Preprocess the data (convert categorical weather type to numerical values)
weather_mapping = {'sun': 0, 'drizzle': 1, 'rain': 2, 'snow': 3}
data['weather'] = data['weather'].map(weather_mapping)

# Step 3: Handle missing values
data.dropna(inplace=True)

# Step 4: Select features and target variable
features = ['precipitation', 'temp_max', 'temp_min', 'wind']
target = 'weather'

X = data[features]
y = data[target]

# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 7: Evaluate the model
y_pred = model.predict(X_test)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Function to make a prediction with default values
def predict_weather(precipitation, temp_max, temp_min, wind):
    input_data = [[precipitation, temp_max, temp_min, wind]]
    prediction = model.predict(input_data)[0]
    # Find the closest weather type
    closest_weather = min(weather_mapping, key=lambda k: abs(weather_mapping[k] - prediction))
    return closest_weather

# Example: Predict weather with default values
default_precipitation = 0.1
default_temp_max = 25
default_temp_min = 15
default_wind = 5

predicted_weather = predict_weather(default_precipitation, default_temp_max, default_temp_min, default_wind)
print(f'Predicted Weather: {predicted_weather}')


Mean Squared Error: 0.5583273847560593
R-squared: 0.4508688401979144
Predicted Weather: drizzle


