In [101]:
import requests
import pandas as pd
from sklearn.linear_model import LinearRegression

# Fetch the data
url = "http://62.72.58.117:3000/training-cycles/ou_bedroom"
response = requests.get(url)
data = response.json()

df = pd.DataFrame(data)

print(df.head())


                        _id   device_id  temp_start  humi_start  target_temp  \
0  6637de928828c818b21efc28  ou_bedroom   30.550724   57.054836           30   
1  6637e8af8828c818b21efc32  ou_bedroom   30.400000   61.529984           30   
2  6637ed408828c818b21efc3a  ou_bedroom   31.700001   61.426476           30   
3  6637f0217cf1520579764f26  ou_bedroom   32.133331   53.228130           30   
4  6637fe597cf1520579764f2a  ou_bedroom   37.879707   48.116722           30   

   time_use  time_start   humi_end  __v  
0      1342  1714936147  50.138153    0  
1       150  1714939929  60.726280    0  
2       561  1714940687  57.063969    0  
3       380  1714941605  54.241890    0  
4       891  1714944734  52.266129    0  


In [102]:
# Data Cleaning
# Drop columns that are not required for modeling
df_cleaned = df.drop(['_id', 'device_id', '__v'], axis=1)

# Convert Unix timestamp to seconds since the first record (for simplicity)
min_time = df_cleaned['time_start'].min()
df_cleaned['time_start'] = df_cleaned['time_start'] - min_time

In [103]:
print(df_cleaned)

   temp_start  humi_start  target_temp  time_use  time_start   humi_end
0   30.550724   57.054836           30      1342           0  50.138153
1   30.400000   61.529984           30       150        3782  60.726280
2   31.700001   61.426476           30       561        4540  57.063969
3   32.133331   53.228130           30       380        5458  54.241890
4   37.879707   48.116722           30       891        8587  52.266129
5   32.265217   53.578228           30       570       11882  53.727398


In [104]:
print(df_cleaned.isnull().sum()) 

temp_start     0
humi_start     0
target_temp    0
time_use       0
time_start     0
humi_end       0
dtype: int64


In [105]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
import numpy as np

# Data Preparation
X = df[['temp_start', 'humi_start', 'target_temp']]  # Updated feature set
y = df['time_use']
# Normalize features
scaler = MinMaxScaler()

# Setup the model with normalization
model = make_pipeline(scaler, LinearRegression())

# Use cross-validation to evaluate the model
scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
mean_mse = np.mean(scores) * -1
print(f'Mean MSE from cross-validation: {mean_mse}')

model.fit(X, y)



Mean MSE from cross-validation: 742139.3940230227


In [106]:
example = pd.DataFrame({
    'temp_start': [32],   # Current temperature
    'humi_start': [55],   # Current humidity
    'target_temp': [30]   # Target temperature
})
predicted_time = model.predict(example)
print(f"Predicted time used: {max(0, predicted_time[0])} seconds")

Predicted time used: 690.6427170745217 seconds


In [107]:
# Assuming 'model' is your trained model object
print("Model type before saving:", type(model))

# Now save the model
import joblib
joblib.dump(model, 'trained_model.pkl')

# Load the model again to test
model = joblib.load('trained_model.pkl')
print("Model type after loading:", type(model))

# Test prediction
sample_data = np.array([[32, 55, 30]])
print("Sample prediction:", model.predict(sample_data))


Model type before saving: <class 'sklearn.pipeline.Pipeline'>
Model type after loading: <class 'sklearn.pipeline.Pipeline'>
Sample prediction: [690.64271707]




In [108]:
import joblib

# Save the trained model
joblib.dump(model, 'trained_model.pkl')

['trained_model.pkl']

In [109]:
import pandas as pd

# Define different starting temperatures to see their effect on cooling time
temperatures = [30, 32, 35, 37, 40]  # Example temperatures in degrees Celsius
humidity = 55  # Constant humidity for simplicity
target_temp = 30  # Constant target temperature

results = []

# Load the model (ensure it's loaded correctly as shown in previous steps)
loaded_model = joblib.load('trained_model.pkl')

for temp in temperatures:
    example = pd.DataFrame({
        'temp_start': [temp],
        'humi_start': [humidity],
        'target_temp': [target_temp]
    })
    predicted_time = loaded_model.predict(example)
    results.append({
        'temp_start': temp,
        'predicted_time': max(0, predicted_time[0])  # Ensure no negative times
    })

# Display the results
for result in results:
    print(f"Predicted time for temp_start = {result['temp_start']}°C, humi_start = {humidity}%, target_temp = {target_temp}°C: {result['predicted_time']} seconds")


Predicted time for temp_start = 30°C, humi_start = 55%, target_temp = 30°C: 739.8337507597817 seconds
Predicted time for temp_start = 32°C, humi_start = 55%, target_temp = 30°C: 690.6427170745217 seconds
Predicted time for temp_start = 35°C, humi_start = 55%, target_temp = 30°C: 616.8561665466316 seconds
Predicted time for temp_start = 37°C, humi_start = 55%, target_temp = 30°C: 567.6651328613716 seconds
Predicted time for temp_start = 40°C, humi_start = 55%, target_temp = 30°C: 493.8785823334814 seconds
