In [17]:
import joblib
import pandas as pd
import numpy as np

# Load the pre-trained model
model_filename = 'random_forest_model.pkl'
rf_loaded = joblib.load(model_filename)

# Load new data for prediction
new_data = pd.read_csv('testreal.csv')

# Print actual target values before dropping them
print("Actual data:")
print(new_data[['RT', 'CH Load', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR']])  # Access actual values from new_data

# Drop the actual target columns from the new data
new_x = new_data.drop(columns=['CH Load', 'RT', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR'])

# If necessary, rename columns to match the model's training data
new_x.rename(columns={
    'Temperature [?C]': 'Temperature [øC]'  # Change as necessary
}, inplace=True)

# Make predictions
predictions = rf_loaded.predict(new_x)

# Custom headers for the predicted values
custom_headers = ['RT', 'CH Load', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR']

# Create a DataFrame for the predictions
predictions_df = pd.DataFrame(predictions, columns=custom_headers)

# Display the results with headers
print("Prediction Data:")
print(predictions_df)


Actual data:
      RT  CH Load     GPM  DeltaCHW  CHWS  CHWR
0  319.2     54.4  1367.9       5.6  46.3  51.9
1  244.8     45.3  1335.5       4.4  47.1  51.5
2  268.5     42.9  1342.5       4.8  47.0  51.8
Prediction Data:
        RT  CH Load       GPM  DeltaCHW    CHWS    CHWR
0  325.784   55.415  1369.001     5.711  46.445  52.156
1  243.729   45.455  1337.829     4.373  47.164  51.537
2  270.416   43.009  1347.020     4.818  46.921  51.739


In [36]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import joblib

# Function to fetch weather data for a specific time slot
def fetch_weather_data(api_key, date_str, start_hour, end_hour):
    city = "Vellore"
    url = f"http://api.weatherstack.com/current?access_key={api_key}&query={city}&units=m"

    try:
        response = requests.get(url)
        data = response.json()

        if response.status_code == 200 and 'current' in data:
            current_temp = data['current']['temperature']
            humidity = data['current']['humidity']
            wet_bulb_temp = current_temp - (humidity / 100) * 5

            # Create a DataFrame with hourly weather data for the entire day
            start_time = datetime.strptime(date_str, "%Y-%m-%d")
            timestamps = [start_time + timedelta(hours=i) for i in range(24)]
            weather_data = pd.DataFrame({
                "Timestamps": timestamps,
                "Temperature": [current_temp] * len(timestamps),
                "RH": [humidity] * len(timestamps),
                "WBT_C": [wet_bulb_temp] * len(timestamps)
            })

            # Filter for the specific time slot
            filtered_data = weather_data[(weather_data['Timestamps'].dt.hour >= start_hour) & 
                                         (weather_data['Timestamps'].dt.hour < end_hour)]

            return filtered_data.iloc[0]  # Return the first row for the selected time slot
        else:
            raise ValueError(f"Failed to fetch weather data. Response code: {response.status_code}")
    except Exception as e:
        print(f"Error fetching weather data: {e}")
        return None

# Function to predict load capacity based on operational metrics and weather data
def predict_load_capacity(api_key, date_str, start_hour, end_hour, hotel_occupancy, operational_metrics):
    # Fetch weather data for the specific time slot
    weather_data = fetch_weather_data(api_key, date_str, start_hour, end_hour)
    
    if weather_data is None:
        return None

    # Load the pre-trained model
    model_filename = 'random_forest_model.pkl'
    rf_loaded = joblib.load(model_filename)

    # Extract the feature names the model was trained with
    model_features = rf_loaded.feature_names_in_  # This will give us the order of features used during training

    # Extract timestamp components (year, month, day, hour, minute) from the weather data
    timestamp = weather_data['Timestamps']
    year = timestamp.year
    month = timestamp.month
    day = timestamp.day
    hour = timestamp.hour
    minute = timestamp.minute

    # Add weekday to the input data
    input_date = datetime.strptime(date_str, "%Y-%m-%d")
    weekday = input_date.weekday()  # Returns an integer (0 = Monday, 6 = Sunday)

    # Add timestamp components and weekday to the operational metrics
    operational_metrics['year'] = year
    operational_metrics['month'] = month
    operational_metrics['day'] = day
    operational_metrics['hour'] = hour
    operational_metrics['minute'] = minute
    operational_metrics['weekday'] = weekday  # Add weekday (0-6) as a feature

    # Add weather data to operational metrics
    operational_metrics['Temperature'] = weather_data['Temperature']
    operational_metrics['RH'] = weather_data['RH']
    operational_metrics['WBT_C'] = weather_data['WBT_C']

    # Add hotel occupancy to the operational metrics
    operational_metrics['Hotel_Occupancy'] = hotel_occupancy

    # Convert the operational metrics to a DataFrame
    input_data = pd.DataFrame([operational_metrics])

    # Reorder the columns to match the model's feature order
    input_data = input_data[model_features]  # Reordering columns to match model feature order

    # Make predictions
    predictions = rf_loaded.predict(input_data)

    # Custom headers for the predicted values (adjust based on model's expected output)
    custom_headers = ['RT', 'CH Load', 'GPM', 'DeltaCHW', 'CHWS', 'CHWR']



    

    # Create a DataFrame for the predictions
    predictions_df = pd.DataFrame(predictions, columns=custom_headers)

    return predictions_df

# Example usage:
api_key = "5460889761385624e130f4ee0e0ad810" 
date_input = input("Enter the date (YYYY-MM-DD): ")
start_hour = int(input("Enter the start hour for the time slot (24-hour format): "))
end_hour = start_hour + 1  # End hour is the next hour
hotel_occupancy = float(input("Enter the hotel occupancy percentage: "))

# Example operational metrics (adjusted to match the actual headers with space corrections)
operational_metrics_data = {
    'kW_Tot': 260.2,
    'kW_RT': 0.815,
    'CH1': 1,
    'CH2': 0,
    'CH3': 0,
    'CH4': 0,
    'kW_CHH': 184.5,
    'kW_CHP': 24.3,
    'kW_CHS': 0,
    'kW_CDS': 31.6,
    'kW_CT': 19.8,
    'DeltaCDW': 5.6,
    'CDHI': 87.6,
    'CDLO': 82,
    'WBT': 76.1,   # Wet Bulb Temperature (°C)
    'DeltaCT': -5.9,
    'Hz_ CHP': 48,  # Ensure this matches the feature name in the model
    'Hz_CHS': 0,
    'Hz_CDS': 47,
    'Hz_CT': 48,
    'Precent_ CHP': 9.3,  # Ensure this matches the feature name in the model
    'Precent_CH': 70.9,
    'Precent_CDS': 12.2,
    'Precent_CT': 7.6,
}

# Call the function to predict load capacity
predicted_load = predict_load_capacity(api_key, date_input, start_hour, end_hour, hotel_occupancy, operational_metrics_data)
print(predicted_load)


Enter the date (YYYY-MM-DD):  2024-09-28
Enter the start hour for the time slot (24-hour format):  11
Enter the hotel occupancy percentage:  95


Model expected features: ['kW_Tot' 'kW_RT' 'CH1' 'CH2' 'CH3' 'CH4' 'kW_CHH' 'kW_CHP' 'kW_CHS'
 'kW_CDS' 'kW_CT' 'DeltaCDW' 'CDHI' 'CDLO' 'WBT' 'DeltaCT' 'Hz_ CHP'
 'Hz_CHS' 'Hz_CDS' 'Hz_CT' 'Precent_CH' 'Precent_ CHP' 'Precent_CDS'
 'Precent_CT' 'RH' 'Temperature' 'WBT_C' 'Hotel_Occupancy' 'weekday'
 'year' 'month' 'day' 'hour' 'minute']
Input data features: Index(['kW_Tot', 'kW_RT', 'CH1', 'CH2', 'CH3', 'CH4', 'kW_CHH', 'kW_CHP',
       'kW_CHS', 'kW_CDS', 'kW_CT', 'DeltaCDW', 'CDHI', 'CDLO', 'WBT',
       'DeltaCT', 'Hz_ CHP', 'Hz_CHS', 'Hz_CDS', 'Hz_CT', 'Precent_CH',
       'Precent_ CHP', 'Precent_CDS', 'Precent_CT', 'RH', 'Temperature',
       'WBT_C', 'Hotel_Occupancy', 'weekday', 'year', 'month', 'day', 'hour',
       'minute'],
      dtype='object')
        RT  CH Load       GPM  DeltaCHW    CHWS    CHWR
0  325.262   55.474  1373.271     5.684  46.427  52.111


In [32]:
# Check if all model features are present in the input data
model_features = rf_loaded.feature_names_in_ 

missing_features = [feature for feature in model_features if feature not in input_data.columns]
extra_features = [feature for feature in input_data.columns if feature not in model_features]

if missing_features:
    print(f"Missing features: {missing_features}")
if extra_features:
    print(f"Extra features: {extra_features}")

NameError: name 'input_data' is not defined