Fetching data from openWeather api weather

In [None]:
import requests

# New API key from the updated base URL
api_key = 'e80244e0c610f8310e79ba9af659cba0'
# City name and country code embedded in the base URL
city_name = 'Islamabad'
state_code='PK-IS'
country_code='+92'
# Updated base URL for the OpenWeatherMap API with embedded city name and API key
base_url = f'https://api.openweathermap.org/data/2.5/weather?q={city_name},{state_code},{country_code}&appid={api_key}&units=metric'

# Make the request to the OpenWeatherMap API
response = requests.get(base_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON data
    data = response.json()
    # Print the current weather information
    print(f"City: {data['name']}")
    print(f"Temperature: {data['main']['temp']}°C")
    print(f"Weather: {data['weather'][0]['description']}")
    print(f"Humidity: {data['main']['humidity']}%")
    print(f"Wind Speed: {data['wind']['speed']} m/s")
else:
    # Print the error message if the request was not successful
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")
    print("Response content:", response.text)





Comparing results of my model and openweather

In [None]:
import pandas as pd
import h5py
import numpy as np
import requests
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Load dataset
df = pd.read_csv('/content/GlobalWeatherRepository.csv')

city_name='Islamabad'
# Filter rows where location_name is Islamabad
df_islamabad = df[df['location_name'] == city_name]

# Define the features and target
X = df_islamabad.drop(columns=['temperature_celsius', 'temperature_fahrenheit'])
y = df_islamabad['temperature_celsius']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['number']).columns

# Define pipelines
numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the scaler and model
joblib.dump(preprocessor, 'preprocessor.pkl')
joblib.dump(model, 'model.pkl')

# Make predictions
predictions = model.predict(X_test)

# Save predictions to a .h5 file
with h5py.File('predictions.h5', 'w') as hf:
    hf.create_dataset('temperature_predictions', data=predictions)

api_key = 'e80244e0c610f8310e79ba9af659cba0'
state_code = 'PK'
coutry_code = '+92'
base_url = f'https://api.openweathermap.org/data/2.5/weather?q={city_name},{state_code},{coutry_code}&appid={api_key}&units=metric'

response = requests.get(base_url)
if response.status_code == 200:
    data = response.json()
    real_time_temperature = data['main']['temp']
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")
    real_time_temperature = None

# Compare predictions with real-time temperature
if real_time_temperature is not None:
    print(f"Real-time temperature: {real_time_temperature}°C")
    print(f"Model predictions: {predictions[:5]}")  # Displaying the first 5 predictions
    for i, pred in enumerate(predictions[:5]):
        print(f"Prediction {i+1}: {pred}, Difference: {abs(pred - real_time_temperature)}")


Predicting Temperature from Personal Model inputs given live from openWeatherTemperature API based on location

In [7]:
import pandas as pd
import h5py
import numpy as np
import requests
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Load dataset
df = pd.read_csv('/content/GlobalWeatherRepository.csv')

city_name = 'Islamabad'
# Filter rows where location_name is Islamabad
df_islamabad = df[df['location_name'] == city_name]

# Convert wind speed from mph to m/s
df_islamabad['wind_mps'] = df_islamabad['wind_mph'] * 0.44704

# Define the features and target
X = df_islamabad[['location_name', 'condition_text', 'humidity', 'wind_mps']]
y = df_islamabad['temperature_celsius']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['number']).columns

# Define pipelines
numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the preprocessor and model
joblib.dump(preprocessor, 'preprocessor.pkl')
joblib.dump(model, 'model.pkl')

# Make predictions on the test set
predictions = model.predict(X_test)

# Save predictions to a .h5 file
with h5py.File('predictions.h5', 'w') as hf:
    hf.create_dataset('temperature_predictions', data=predictions)

# Fetch real-time weather data from OpenWeatherMap API
api_key = 'e80244e0c610f8310e79ba9af659cba0'
base_url = f'https://api.openweathermap.org/data/2.5/weather?q={city_name},PK&appid={api_key}&units=metric'

response = requests.get(base_url)
if response.status_code == 200:
    data = response.json()
    real_time_temperature = data['main']['temp']
    real_time_humidity = data['main']['humidity']
    real_time_wind_mps = data['wind']['speed']
    real_time_condition = data['weather'][0]['description']
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")
    real_time_temperature = None

# Prepare real-time data for prediction
if real_time_temperature is not None:
    real_time_data = pd.DataFrame({
        'location_name': [city_name],
        'condition_text': [real_time_condition],
        'humidity': [real_time_humidity],
        'wind_mps': [real_time_wind_mps]
    })

    # Preprocess the real-time data
    preprocessor = joblib.load('preprocessor.pkl')
    real_time_data_preprocessed = preprocessor.transform(real_time_data)

    # Predict temperature using the trained model
    model = joblib.load('model.pkl')
    predicted_temperature = model.predict(real_time_data_preprocessed)

    # Compare predictions with real-time temperature
    print(f"Real-time temperature (from API): {real_time_temperature}°C")
    print(f"Predicted temperature (from model): {predicted_temperature[0]}°C")
    print(f"Difference: {abs(predicted_temperature[0] - real_time_temperature)}°C")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_islamabad['wind_mps'] = df_islamabad['wind_mph'] * 0.44704


Real-time temperature (from API): 33.24°C
Predicted temperature (from model): 30.2388141894805°C
Difference: 3.0011858105195017°C


In [8]:
df_islamabad

Unnamed: 0,country,location_name,latitude,longitude,timezone,last_updated_epoch,last_updated,temperature_celsius,temperature_fahrenheit,condition_text,...,air_quality_PM10,air_quality_us-epa-index,air_quality_gb-defra-index,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination,wind_mps
131,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1715849100,2024-05-16 13:45,38.2,100.8,Sunny,...,31.0,2,2,05:06 AM,07:02 PM,12:27 PM,01:24 AM,Waxing Gibbous,55,3.308096
326,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1715868000,2024-05-16 19:00,33.0,91.4,Sunny,...,46.0,2,3,05:06 AM,07:02 PM,12:27 PM,01:24 AM,Waxing Gibbous,55,3.218688
520,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1715961600,2024-05-17 21:00,31.3,88.4,Clear,...,62.3,2,3,05:05 AM,07:03 PM,01:23 PM,01:50 AM,Waxing Gibbous,64,3.084576
713,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1716042600,2024-05-18 19:30,32.4,90.4,Sunny,...,28.5,1,2,05:04 AM,07:04 PM,02:18 PM,02:13 AM,Waxing Gibbous,73,3.710432
907,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1716128100,2024-05-19 19:15,32.1,89.8,Sunny,...,35.6,2,2,05:04 AM,07:04 PM,03:13 PM,02:36 AM,Waxing Gibbous,81,1.788160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19699,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1724674500,2024-08-26 17:15,34.3,93.8,Sunny,...,60.7,3,5,05:37 AM,06:40 PM,10:53 PM,12:58 PM,Last Quarter,54,1.296416
19894,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1724760900,2024-08-27 17:15,26.1,79.0,Sunny,...,71.3,3,8,05:38 AM,06:39 PM,11:43 PM,02:05 PM,Waning Crescent,43,3.710432
20089,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1724848200,2024-08-28 17:30,28.3,82.9,Cloudy,...,46.3,3,4,05:39 AM,06:38 PM,No moonrise,03:07 PM,Waning Crescent,33,0.983488
20284,Pakistan,Islamabad,33.7,73.17,Asia/Karachi,1724932800,2024-08-29 17:00,26.6,79.9,Patchy light drizzle,...,40.6,2,3,05:40 AM,06:36 PM,12:40 AM,04:00 PM,Waning Crescent,24,3.710432


Predicting Temperature from Personal Model inputs given live from openWeatherTemperature API based on input parameters not on location

In [9]:
import pandas as pd
import h5py
import requests
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Load dataset
df = pd.read_csv('/content/GlobalWeatherRepository.csv')

# Filter the relevant features
df = df[['temperature_celsius', 'condition_text', 'humidity', 'wind_mph']]

# Convert wind_mph to wind_speed in seconds (1 mph = 0.44704 m/s)
df['wind_speed_sec'] = df['wind_mph'] * 0.44704

# Drop the original wind_mph column
df = df.drop(columns=['wind_mph'])

# Define the features and target
X = df[['condition_text', 'humidity', 'wind_speed_sec']]
y = df['temperature_celsius']

# Identify categorical and numerical columns
categorical_cols = ['condition_text']
numerical_cols = ['humidity', 'wind_speed_sec']

# Define pipelines
numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the scaler and model
joblib.dump(preprocessor, 'preprocessor.pkl')
joblib.dump(model, 'model.pkl')

# Fetch real-time weather data from OpenWeather API
api_key = 'e80244e0c610f8310e79ba9af659cba0'
city_name = 'Islamabad,PK'
base_url = f'https://api.openweathermap.org/data/2.5/weather?q={city_name}&appid={api_key}&units=metric'

response = requests.get(base_url)
if response.status_code == 200:
    data = response.json()
    api_condition = data['weather'][0]['description']
    api_humidity = data['main']['humidity']
    api_wind_speed_sec = data['wind']['speed']  # Already in m/s
    real_time_temperature = data['main']['temp']

    # Prepare data for model prediction
    api_data = pd.DataFrame({
        'condition_text': [api_condition],
        'humidity': [api_humidity],
        'wind_speed_sec': [api_wind_speed_sec]
    })

    # Preprocess the API data
    api_data_preprocessed = preprocessor.transform(api_data)

    # Predict using the trained model
    predicted_temperature = model.predict(api_data_preprocessed)[0]

    # Compare predictions with real-time temperature
    print(f"Real-time temperature from API: {real_time_temperature}°C")
    print(f"Predicted temperature from model: {predicted_temperature}°C")
    print(f"Difference: {abs(predicted_temperature - real_time_temperature)}°C")

else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")
    real_time_temperature = None


Real-time temperature from API: 33.24°C
Predicted temperature from model: 24.78383300200693°C
Difference: 8.456166997993073°C


Applied Different Techniques to Improve model Accuracy

In [10]:
import pandas as pd
import h5py
import requests
import joblib
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv('/content/GlobalWeatherRepository.csv')

# Filter the relevant features
df = df[['temperature_celsius', 'condition_text', 'humidity', 'wind_mph']]

# Convert wind_mph to wind_speed in m/s (1 mph = 0.44704 m/s)
df['wind_speed_mps'] = df['wind_mph'] * 0.44704

# Drop the original wind_mph column
df = df.drop(columns=['wind_mph'])

# Define the features and target
X = df[['condition_text', 'humidity', 'wind_speed_mps']]
y = df['temperature_celsius']

# Identify categorical and numerical columns
categorical_cols = ['condition_text']
numerical_cols = ['humidity', 'wind_speed_mps']

# Define preprocessing pipelines
numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_pipeline, numerical_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

# Apply preprocessing and model training within a pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'regressor__n_estimators': [100, 200],
    'regressor__max_depth': [None, 10, 20],
    'regressor__min_samples_split': [2, 5],
    'regressor__min_samples_leaf': [1, 2]
}

grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model from GridSearch
best_model = grid_search.best_estimator_

# Evaluate the model
y_pred = best_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Performance on Test Set:")
print(f"RMSE: {rmse:.2f}°C")
print(f"MAE: {mae:.2f}°C")
print(f"R² Score: {r2:.2f}")

# Save the preprocessor and model
joblib.dump(best_model, 'best_model.pkl')

# Fetch real-time weather data from OpenWeatherMap API
api_key = 'e80244e0c610f8310e79ba9af659cba0'
city_name = 'Islamabad,PK'
base_url = f'https://api.openweathermap.org/data/2.5/weather?q={city_name}&appid={api_key}&units=metric'

response = requests.get(base_url)
if response.status_code == 200:
    data = response.json()
    api_condition = data['weather'][0]['description']
    api_humidity = data['main']['humidity']
    api_wind_speed_mps = data['wind']['speed']  # Already in m/s
    real_time_temperature = data['main']['temp']

    # Prepare data for model prediction
    api_data = pd.DataFrame({
        'condition_text': [api_condition],
        'humidity': [api_humidity],
        'wind_speed_mps': [api_wind_speed_mps]
    })

    # Load the trained model
    best_model = joblib.load('best_model.pkl')

    # Predict using the trained model
    predicted_temperature = best_model.predict(api_data)[0]

    # Compare predictions with real-time temperature
    print(f"\nReal-time temperature from API: {real_time_temperature}°C")
    print(f"Predicted temperature from model: {predicted_temperature:.2f}°C")
    print(f"Difference: {abs(predicted_temperature - real_time_temperature):.2f}°C")

else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")
    print("Response content:", response.text)


Model Performance on Test Set:
RMSE: 5.40°C
MAE: 4.15°C
R² Score: 0.41

Real-time temperature from API: 32.69°C
Predicted temperature from model: 24.15°C
Difference: 8.54°C
