In [None]:
import pandas as pd
df = pd.read_csv('data_for_1_model_all_year_bookings.csv')
#Add years you want to train the model on
df_train = df[df['date'].between('2022-01-01', '2023-12-30')].copy()

# Creates bookings increases 

In [None]:
df_train['increase_factor'] = df_train.apply(lambda row: ((row['visitors'] - row['bookings']) / row['bookings']) if row['bookings'] > 0 else 0, axis=1)
df_train.dropna(inplace=True)
print(df_train)

# Creates csv with median bookings increases sorted on day_of_year and X days before target day

In [None]:
df_train['date'] = pd.to_datetime(df_train['date'])
df_train['day_of_year'] = df_train['date'].dt.dayofyear
 
average_increase_by_week_and_days_before = df_train.groupby(['day_of_year','days_before'])['increase_factor'].median()
average_increase_by_week_and_days_before = average_increase_by_week_and_days_before.round(2)
print(average_increase_by_week_and_days_before)
average_increase_by_week_and_days_before.to_csv('daily_increases.csv')

# Test the simple statistics model

In [None]:
increase_df = pd.read_csv('daily_increases.csv')
df_test = df[df['date'].between('2024-01-01', '2024-12-31')].copy()

df_test['date'] = pd.to_datetime(df_test['date'])
df_test['day_of_year'] = df_test['date'].dt.dayofyear
df_test['day_of_month'] = df_test['date'].dt.day

In [None]:
def find_nearest_forward_match(target_day, available_days):
    """
    Hittar den närmsta dagen framåt från target_day i available_days.
    Returnerar None om ingen sådan dag finns.
    """
    future_days = available_days[available_days >= target_day]
    if not future_days.empty:
        return future_days.min() 
    return None

def perform_statistical_predictions(row):
    matching_increase = increase_df[
        (increase_df['day_of_year'] == row['day_of_year']) & 
        (increase_df['days_before'] == row['days_before'])
    ]['increase_factor']
    
    if not matching_increase.empty:
        adjusted_increase = matching_increase.iloc[0].round(0)
        
        return row['bookings'] * (1 + adjusted_increase)
    
    else:
        # Om ingen exakt matchning finns, sök efter den närmaste framåt matchningen
        nearest_day = find_nearest_forward_match(row['day_of_year'], increase_df['day_of_year'])
        
        if nearest_day is not None:
            nearest_increase = increase_df[
                (increase_df['day_of_year'] == nearest_day) & 
                (increase_df['days_before'] == row['days_before'])
            ]['increase_factor']
            
            if not nearest_increase.empty:
                adjusted_increase = nearest_increase.iloc[0].round(0)
                return row['bookings'] * (1 + adjusted_increase)
            
        return row['bookings']

df_test['predicted_visitors'] = df_test.apply(perform_statistical_predictions, axis=1)

for index, row in df_test.iterrows():
    date = row['date']
    bookings = row['bookings']
    visitors = row['visitors']
    pred = row['predicted_visitors']
    days_before = row['days_before']
    print(f"Date:{date} Days before:{days_before} Bookings:{bookings} Actual Visitors:{visitors}  Prediction:{pred}\n")


# Plots test

In [None]:
import matplotlib.pyplot as plt
# Filter test_df on days_before
test_df = df_test[df_test['days_before'].isin([10])].copy()

plt.figure(figsize=(15, 7))
plt.plot(test_df['predicted_visitors'], label='Prediktioner')
plt.plot(test_df['visitors'], label='Riktiga Värden')
plt.title('Tensorflow Modell Maj')
plt.xlabel('Datum')
plt.ylabel('Besökare')
plt.legend()
plt.show()

# Uncomment and run if you want to merge your training data for model training with increases

In [None]:
# # your training data file 
# df = pd.read_csv('data_for_1_model_all_year_bookings.csv')
# df['day_of_year'] = pd.to_datetime(df['date']).dt.day_of_year
# df = df.drop(columns=['bookings_increase'])
# #statistical increase data
# df_increase = pd.read_csv('daily_increases.csv')

# df_merged = pd.merge(df, df_increase[['day_of_year', 'days_before','increase_factor']], on=['day_of_year', 'days_before'], how='left')
# df_merged = df_merged.rename(columns={'increase_factor': 'bookings_increase'})

# print(df_merged)
# df_merged.to_csv('merged_data')