In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import os

ModuleNotFoundError: No module named 'sklearn'

In [None]:
# 1. Daten laden
required_files = ["umsatzdaten_gekuerzt.csv", "kiwo.csv", "wetter.csv", "Data Import and Preparation/feiertage.csv"]
for file in required_files:
    if not os.path.exists(file):
        raise FileNotFoundError(f"Datei '{file}' nicht gefunden.")

umsatzdaten = pd.read_csv("umsatzdaten_gekuerzt.csv")
kiwo_data = pd.read_csv("kiwo.csv")
wetter_data = pd.read_csv("wetter.csv")
feiertage = pd.read_csv("Data Import and Preparation/feiertage.csv")

In [None]:
# 2. Datum anpassen und konvertieren
umsatzdaten['Datum'] = pd.to_datetime(umsatzdaten['Datum'], errors='coerce')
kiwo_data['Datum'] = pd.to_datetime(kiwo_data['Datum'], errors='coerce')
wetter_data['Datum'] = pd.to_datetime(wetter_data['Datum'], errors='coerce')

# Feiertags-Datum anpassen
feiertage['Datum'] = feiertage['Datum'].apply(lambda x: f"{x.strip()} 2017")
feiertage['Datum'] = pd.to_datetime(feiertage['Datum'], format='%d. %b %Y', errors='coerce')

In [None]:
# 3. Daten zusammenführen
merged_data = umsatzdaten.merge(kiwo_data, on='Datum', how='left')
merged_data = merged_data.merge(wetter_data, on='Datum', how='left')
merged_data = merged_data.merge(feiertage[['Datum', 'Name']], on='Datum', how='left')

In [None]:
# 4. Zusätzliche Variablen erstellen
merged_data['Wochentag'] = merged_data['Datum'].dt.day_name()
merged_data['Wochentag_Nummer'] = merged_data['Datum'].dt.weekday
merged_data['Monat'] = merged_data['Datum'].dt.month
merged_data['Quartal'] = merged_data['Datum'].dt.quarter
merged_data['Ist_Wochenende'] = merged_data['Wochentag'].isin(['Saturday', 'Sunday']).astype(int)
merged_data['Ist_Feiertag'] = merged_data['Name'].notnull().astype(int)

def categorize_temperature(temp):
    if temp < 10:
        return "Kalt"
    elif 10 <= temp < 20:
        return "Mild"
    else:
        return "Warm"

merged_data['Temperatur_Kategorie'] = merged_data['Temperatur'].apply(categorize_temperature)
merged_data['Feiertagsnaehe'] = (
    (merged_data['Ist_Feiertag'] == 1) |
    (merged_data['Datum'] - pd.Timedelta(days=1)).isin(feiertage['Datum']) |
    (merged_data['Datum'] + pd.Timedelta(days=1)).isin(feiertage['Datum'])
).astype(int)
merged_data['Umsatz_Trend_7_Tage'] = merged_data['Umsatz'].rolling(window=7, min_periods=1).mean()

In [None]:
# 5. Datensatz teilen
train_data = merged_data[(merged_data['Datum'] >= '2013-07-01') & (merged_data['Datum'] <= '2017-07-31')]
validation_data = merged_data[(merged_data['Datum'] >= '2017-08-01') & (merged_data['Datum'] <= '2018-07-31')]

In [None]:
# 6. Lineares Modell erstellen
features = ['Wochentag_Nummer', 'Feiertagsnaehe', 'Temperatur', 'Ist_Wochenende']
X_train = train_data[features]
y_train = train_data['Umsatz']

model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 7. Adjustiertes R^2 berechnen
n = len(y_train)
p = X_train.shape[1]
r2 = model.score(X_train, y_train)
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))

# Ergebnisse ausgeben
print("R²:", r2)
print("Adjustiertes R²:", adj_r2)

In [None]:
# 8. Ergebnisse speichern
output_dir = "Baseline Model"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Ergebnisse dokumentieren
with open(os.path.join(output_dir, "results.txt"), "w") as f:
    f.write(f"R²: {r2}\n")
    f.write(f"Adjustiertes R²: {adj_r2}\n")

train_data.to_csv(os.path.join(output_dir, "train_data.csv"), index=False)
validation_data.to_csv(os.path.join(output_dir, "validation_data.csv"), index=False)

print("Berechnungen abgeschlossen und Ergebnisse gespeichert.")