In [None]:

# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("austin_weather.csv")
df.head()


In [None]:
df.info()
df.describe()
df.isa().sum()

In [None]:

# Remove irrelevant columns
columns_to_drop = ["Events", "Date", "SeaLevelPressureHighInches", "SeaLevelPressureLowInches"]
df = df.drop(columns=columns_to_drop, errors="ignore")

# Replace non-numerical symbols
df = df.replace("T", 0)   # Trace rainfall
df = df.replace("-", np.nan)

# Convert all columns to numeric
df = df.apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values
df = df.dropna()

df.head()


In [None]:

# Target variable (y)
y = df["PrecipitationSumInches"]

# Feature variables (X)
X = df.drop("PrecipitationSumInches", axis=1)

X.head(), y.head()


In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:


model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)


In [None]:

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("RÂ² Score:", r2)


In [None]:


plt.figure(figsize=(12,5))
plt.plot(df["PrecipitationSumInches"])
plt.title("Precipitation Trend Over Time")
plt.xlabel("Days")
plt.ylabel("Precipitation (Inches)")
plt.show()


In [None]:


plt.figure(figsize=(10,5))
plt.scatter(df["HumidityAvgPercent"], df["PrecipitationSumInches"])
plt.title("Humidity vs Precipitation")
plt.xlabel("Humidity (%)")
plt.ylabel("Precipitation (inches)")
plt.show()

plt.figure(figsize=(10,5))
plt.scatter(df["WindSpeedAvgMPH"], df["PrecipitationSumInches"])
plt.title("Wind Speed vs Precipitation")
plt.xlabel("Wind Speed (MPH)")
plt.ylabel("Precipitation (inches)")
plt.show()


In [None]:


print("""
The Linear Regression model predicts precipitation levels based on weather attributes.
Higher humidity generally increases precipitation likelihood.
Wind speed has moderate relationship.
The model's accuracy depends heavily on the dataset's variability.
""")
