In [1]:
import http.client
import json
import csv
from datetime import datetime, timedelta
 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
 
 
 
 
import xgboost as xgb
from sklearn.metrics import mean_absolute_error

In [25]:
def fetch_weather_data(city):
    # Get today's date and the date 7 days ago
    today = datetime.today()
    seven_days_ago = today - timedelta(days=6)
   
    # Format dates to strings
    today_str = today.strftime('%Y-%m-%d')
    seven_days_ago_str = seven_days_ago.strftime('%Y-%m-%d')
 
    # API Connection to RapidAPI Weather API
    conn = http.client.HTTPSConnection("weatherapi-com.p.rapidapi.com")
 
    headers = {
           'x-rapidapi-key': "4765efd0e4msh0e6f4310f441125p115acdjsnc5b407e62f78",
    'x-rapidapi-host': "weatherapi-com.p.rapidapi.com"
    }
 
    # API URL to fetch historical weather data
    url = f"/history.json?q={city}&lang=en&dt={seven_days_ago_str}&end_dt={today_str}"
 
    # Make the API request
    conn.request("GET", url, headers=headers)
 
    # Get the response
    res = conn.getresponse()
    data = res.read()
   
 
    # Parse the JSON response
 
    weather_data = json.loads(data.decode("utf-8"))
   
    return weather_data

In [3]:
def convert_to_csv(weather_data, city):
    header = ["Date", "AvgTemp", "Humidity", "WindSpeed", "Precipitation", "Condition"]
    rows = []
 
    # Loop through the forecast data and add each day's weather information
    for day in weather_data['forecast']['forecastday']:
        rows.append([
            day['date'],  # Date
            day['day']['avgtemp_c'],  # Average Temperature (Celsius)
            day['day']['avghumidity'],  # Average Humidity
            day['day']['maxwind_kph'],  # Max Wind Speed (kph)
            day['day']['totalprecip_mm'],  # Precipitation (mm)
            day['day']['condition']['text']  # Weather Condition (text)
        ])
   
    # Write the data to a CSV file using the csv module
    csv_filename = f"{city}_weather_data.csv"
    with open(csv_filename, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(header)  # Write the header row
        writer.writerows(rows)  # Write the weather data
 
    print(f"CSV file '{csv_filename}' created successfully.")
 
 

In [6]:
city='Colombo'
# weatherdata = fetch_weather_data(city)
convert_to_csv(weatherdata,city)

CSV file 'Colombo_weather_data.csv' created successfully.


In [27]:
def preprocess_data(csv_filename):
     # Load the CSV data into a DataFrame
    df = pd.read_csv(csv_filename)
 
    # Ensure the 'Date' column is in datetime format (if you need to keep it)
    df['Date'] = pd.to_datetime(df['Date'])
 
    # Drop the 'Date' column as it's not useful for prediction
    df = df.drop(columns=["Date"])
 
    # One-hot encode the 'Condition' column (convert categorical to numeric)
    encoder = OneHotEncoder(sparse_output=False,handle_unknown='ignore')
    condition_encoded = encoder.fit_transform(df[['Condition']])
    condition_df = pd.DataFrame(condition_encoded, columns=encoder.get_feature_names_out(['Condition']))
 
    # Concatenate the one-hot encoded columns back into the original dataframe (without 'Condition' column)
    df = pd.concat([df.drop(columns=['Condition']), condition_df], axis=1)
 
    # Handle missing values (simple imputation strategy)
    imputer = SimpleImputer(strategy="mean")
    df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
    
    # Feature columns (excluding the target column 'AvgTemp')
    X = df.drop(columns=["AvgTemp"])
    print(X)
 
    # Target column (assuming you're predicting 'AvgTemp')
    y = df["AvgTemp"]
    X = X.values
 
    # Normalize the features (optional)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
 
    print("Shape of X_scaled:", X_scaled.shape)
    return X_scaled, y,scaler,encoder
 
 
def split_data(X, y):
    # Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print('x train in split',X_train)
    return X_train, X_test, y_train, y_test
 
 
 
def train_model(X_train, y_train):
    # Initialize the XGBoost model
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100, random_state=42)
    print("Shape of X_train:", X_train.shape)
   
    # Train the model
    model.fit(X_train, y_train)
   
    return model
 
 
def evaluate_model(model, X_test, y_test):
    # Make predictions on the test set
    y_pred = model.predict(X_test)
 
    # Evaluate the model using Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Absolute Error: {mae:.2f}")
 
    # You can also print predicted vs actual values if needed
    comparison = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
    print(comparison.head())
 
 
def predict_next_day_temperature(model, input_data):
    # Make a prediction for the next day's temperature
    predicted_temp = model.predict(input_data)
    print(f"Predicted temperature for next day: {predicted_temp[0]:.2f}°C")
 
 

In [28]:
X_scaled, y,scaler,encoder = preprocess_data(f"{city}_weather_data.csv")
X_train, X_test, y_train, y_test = split_data(X_scaled, y)
print('xtrain',X_train)
model = train_model(X_train, y_train)
evaluate_model(model, X_test, y_test)
input_data = [[26.4, 81, 22, 0.05, 6]]  # Example input data for the model (replace with your own)
predict_next_day_temperature(model, input_data)

   Humidity  WindSpeed  Precipitation  Condition_Light rain shower  \
0      78.0       14.8           0.00                          0.0   
1      76.0       14.8           0.00                          0.0   
2      77.0       15.8           0.00                          0.0   
3      77.0       16.2           0.71                          0.0   
4      83.0       14.8           9.14                          1.0   
5      79.0       26.3           2.24                          0.0   
6      81.0       21.6           8.19                          1.0   

   Condition_Patchy light drizzle  Condition_Patchy rain possible  \
0                             0.0                             0.0   
1                             0.0                             0.0   
2                             0.0                             0.0   
3                             0.0                             1.0   
4                             0.0                             0.0   
5                        

ValueError: Feature shape mismatch, expected: 7, got 5