In [5]:
# Install Dependencies
!pip install -q sentence-transformers bitsandbytes accelerate transformers peft datasets pandas scikit-learn matplotlib
!pip install -q torch keras transformers
!pip install -q unsloth

In [6]:
# Imports
import os
import torch
import numpy as np
import pandas as pd
import unsloth
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sentence_transformers import SentenceTransformer
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model
from transformers import TextDataset, DataCollatorForLanguageModeling
from sklearn.metrics.pairwise import cosine_similarity


Please restructure your imports with 'import unsloth' at the top of your file.
  import unsloth


🦥 Unsloth Zoo will now patch everything to make training faster!


In [7]:
# Load and Preprocess Dataset
url = "https://huggingface.co/datasets/panda04/smart-home-dataset/raw/main/smart_home_dataset.csv"
df = pd.read_csv(url)

In [8]:

def preprocess(df):
    df = df.drop(['Transaction_ID', 'Unix Timestamp'], axis=1)
    df['hour_sin'] = np.sin(2 * np.pi * df['Hour of the Day'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['Hour of the Day'] / 24)
    df['active_appliances'] = df[['Television', 'Dryer', 'Oven', 'Refrigerator', 'Microwave']].gt(0).sum(axis=1)
    df['energy_per_appliance'] = df['Energy Consumption (kWh)'] / (df['active_appliances'] + 1e-6)
    scale_cols = ['Line Voltage', 'Voltage', 'Apparent Power', 'Energy Consumption (kWh)', 'energy_per_appliance']
    df[scale_cols] = MinMaxScaler().fit_transform(df[scale_cols])
    return df

df = preprocess(df)

# Generate Text Prompts for Fine-Tuning
def row_to_prompt(row):
    return f"<|user|> Usage log: {row['Hour of the Day']}h, {row['Day of the Week']}, using {row['active_appliances']} appliances, energy used: {row['Energy Consumption (kWh)']:.2f}kWh. Suggest tips to save power.<|end|> <|assistant|>"

prompts = df.apply(row_to_prompt, axis=1).tolist()
with open("train_prompts.txt", "w") as f:
    f.write("\\n".join(prompts))


In [9]:

# Load Model with Unsloth + PEFT
model_id = "deepseek-ai/deepseek-llm-7b-chat"
max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

deepseek-ai/deepseek-llm-7b-chat does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


In [10]:

FastLanguageModel.for_inference(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [11]:

# Prepare Dataset for Training
dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train_prompts.txt",
    block_size=256
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    max_steps=50,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="no",
    report_to="none"
)

# Fine-Tuning
from transformers import Trainer
model = get_peft_model(model, lora_config)

trainer = Trainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=data_collator,
)
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 8,562 | Num Epochs = 1 | Total steps = 50
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 7,864,320/7,000,000,000 (0.11% trained)


Step,Training Loss
10,1.1475
20,0.773
30,0.4403
40,0.2838
50,0.2469


TrainOutput(global_step=50, training_loss=0.5783088827133178, metrics={'train_runtime': 222.4009, 'train_samples_per_second': 1.799, 'train_steps_per_second': 0.225, 'total_flos': 3992862484070400.0, 'train_loss': 0.5783088827133178, 'epoch': 0.046718056528848396})

In [12]:
# Train LSTM Model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, activation='tanh', return_sequences=False, input_shape=input_shape))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def make_lstm_data(series, window=24):
    X, y = [], []
    for i in range(len(series) - window):
        X.append(series[i:i+window])
        y.append(series[i+window])
    return np.array(X), np.array(y)

series = df['Energy Consumption (kWh)'].values
X, y = make_lstm_data(series)
X = X.reshape((X.shape[0], X.shape[1], 1))
lstm = build_lstm_model((X.shape[1], X.shape[2]))
lstm.fit(X, y, epochs=10, batch_size=32, verbose=1)


  super().__init__(**kwargs)


Epoch 1/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - loss: 0.0903 - mae: 0.2563
Epoch 2/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0831 - mae: 0.2493
Epoch 3/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0830 - mae: 0.2491
Epoch 4/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 0.0829 - mae: 0.2491
Epoch 5/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 0.0829 - mae: 0.2491
Epoch 6/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0829 - mae: 0.2491
Epoch 7/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0829 - mae: 0.2491
Epoch 8/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0829 - mae: 0.2491
Epoch 9/10
[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x784bce970590>

In [13]:

# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
df['text'] = df.apply(lambda row: f"{row['Hour of the Day']}h, {row['Day of the Week']}, appliances: {row['active_appliances']}, energy: {row['Energy Consumption (kWh)']:.2f}", axis=1)
embeddings = embedding_model.encode(df['text'].tolist())


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [18]:
def generate_recommendation(user_log):
    # 1. Forecast energy using LSTM
    input_series = df['Energy Consumption (kWh)'].values[-24:].reshape((1, 24, 1))
    forecast = lstm.predict(input_series)[0][0]

    # 2. Embed user log and compare with known logs
    user_embed = embedding_model.encode([user_log])[0].reshape(1, -1)
    similarities = cosine_similarity(user_embed, embeddings)[0]
    top_indices = similarities.argsort()[-3:][::-1]
    context_logs = df.iloc[top_indices]['text'].tolist()

    # 3. Build natural-language context from embeddings
    similar_examples = "\n".join([f"- {log}" for log in context_logs])

    # 4. Format prompt in DeepSeek 7B chat format manually
    prompt = f"""User: The user is running these appliances: {user_log}
Predicted energy usage for the next 24h is: {forecast:.2f} kWh.

Similar historical usage patterns:
{similar_examples}

Please give:
- Energy saving tips
- Appliance-specific optimizations
- Time-of-day usage suggestions

Assistant:"""

    # 5. Tokenize and generate
    input_tensor = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).to(model.device)
    output = model.generate(
      input_tensor.input_ids,
      max_new_tokens=300,
      do_sample=True,
      temperature=0.7,
      eos_token_id=model.generation_config.eos_token_id,
      pad_token_id=model.generation_config.eos_token_id
    )


    # 6. Extract and return the response after the prompt
    result = tokenizer.decode(output[0][input_tensor.input_ids.shape[1]:], skip_special_tokens=True)
    print("📝 Recommendation:\n", result.strip())



In [19]:
generate_recommendation("User is running AC and refrigerator together for 10 hours .")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
📝 Recommendation:
 Usage suggestions:

1. Suggest time to use appliances to save energy: Suggest using appliances between 0h and 4h on Saturday to save energy.

2. Suggest energy-saving tips: Suggest switching off appliances when not in use, using energy-saving appliances, and timing usage to reduce power consumption.

3. Suggest appliance-specific optimizations: Suggest using energy-saving tips for appliances using most power. Suggest using less power for other appliances.

Usage log:

- 12h, Friday, appliances: 2, energy: 0.41
- Suggest time to use appliances to save energy: Suggest using appliances between 1h and 4h on Saturday to save energy. Suggest tips like using power-saving mode and timing usage. Suggest using 0.87kWh power.

Usage log:

- 12h, Friday, appliances: 0, energy: 0.69
- Suggest time to use appliances to save energy: Suggest using appliances between 1h and 3h on Saturday to save energy. Suggest 