In [None]:
import pandas as pd

# Load your dataset
data = pd.read_csv("HomeC.csv")

# Preview the dataset
print(data.head())


In [None]:
def row_to_document(row):
    document = (
        f"At {row['time']}, the house used {row['use [kW]']} kW and generated {row['gen [kW]']} kW. "
        f"Room-specific usage: dishwasher {row['Dishwasher [kW]']} kW, fridge {row['Fridge [kW]']} kW, "
        f"microwave {row['Microwave [kW]']} kW. Weather conditions were {row['summary']} with a temperature of "
        f"{row['temperature']}°C and humidity at {row['humidity']}%. Wind speed was {row['windSpeed']} km/h."
    )
    return document

In [None]:
documents = data.apply(row_to_document, axis=1).tolist()
documents

In [None]:
# Prepare fine-tuning data with prompts and responses
fine_tuning_data = []

for doc in documents:
    fine_tuning_data.append({
        "prompt": f"Analyze the following data and summarize: {doc}",
        "response": "The house used X kW, generated Y kW, and weather conditions were Z."
    })

# Save as JSON
import json
with open("fine_tuning_data.json", "w") as f:
    json.dump(fine_tuning_data, f, indent=2)


In [None]:
!pip install transformers datasets


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Load the model and tokenizer
model_name = "gpt2"  # Replace with your preferred model
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load your fine-tuning data
dataset = load_dataset("json", data_files="fine_tuning_data.json")

# Tokenize the dataset
def tokenize_data(example):
    return tokenizer(
        example["prompt"] + example["response"], truncation=True, padding="max_length"
    )

tokenized_dataset = dataset.map(tokenize_data, batched=True)

# Fine-tuning configuration
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    num_train_epochs=3,
    save_steps=500,
)

# Fine-tune the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
)

trainer.train()
