In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('WeatherHistory.csv')

# Display dataset preview
print("Dataset Preview:")
print(df[['Temperature (C)', 'Apparent Temperature (C)', 'Humidity']].head())

# 1. Correlation analysis
print("\nCorrelation Matrix:")
print(df[['Humidity', 'Temperature (C)', 'Apparent Temperature (C)']].corr())

# 2. Visualizing relationships
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(df['Humidity'], df['Temperature (C)'], alpha=0.3)
plt.title('Humidity vs Temperature')
plt.xlabel('Humidity')
plt.ylabel('Temperature (C)')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.scatter(df['Humidity'], df['Apparent Temperature (C)'], alpha=0.3, color='orange')
plt.title('Humidity vs Apparent Temperature')
plt.xlabel('Humidity')
plt.ylabel('Apparent Temperature (C)')
plt.grid(True)

plt.tight_layout()
plt.show()

# 3. Predict Apparent Temperature from Humidity using Linear Regression
X = df[['Humidity']]
y = df['Apparent Temperature (C)']

model = LinearRegression()
model.fit(X, y)

y_pred = model.predict(X)

# Plot regression line
plt.scatter(X, y, alpha=0.3, label="Data")
plt.plot(X, y_pred, color='red', label="Regression Line")
plt.title('Prediction: Apparent Temp vs Humidity')
plt.xlabel('Humidity')
plt.ylabel('Apparent Temperature (C)')
plt.legend()
plt.grid(True)
plt.show()

# Model evaluation
print(f"Intercept (b): {model.intercept_}")
print(f"Coefficient (m): {model.coef_[0]}")
print(f"Mean Squared Error (MSE): {mean_squared_error(y, y_pred)}")
print(f"R-squared Score: {r2_score(y, y_pred)}")

# Predict for a new value
example = pd.DataFrame({'Humidity': [0.85]})
prediction = model.predict(example)[0]
print(f"\nPredicted Apparent Temperature for Humidity 0.85: {prediction:.2f} Â°C")
