
# Implement dynamic pricing strategies for fitness classes based on demand, time, and location

In [138]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [139]:
df1 = pd.read_csv('Classes April-May 2018.csv')
df2 = pd.read_csv('Classes June 2018.csv')
print(df1.head())  
print(df2.head()) 

  ActivitySiteID      ActivityDescription  \
0            HXP  20-20-20  2.45pm-3.45pm   
1            HXP  20-20-20  2.45pm-3.45pm   
2            HXP  20-20-20  2.45pm-3.45pm   
3            HXP  20-20-20  2.45pm-3.45pm   
4            HXP  20-20-20  2.45pm-3.45pm   

  BookingEndDateTime (Month / Day / Year) BookingStartTime  MaxBookees  \
0                               08-Apr-18         14:45:00          25   
1                               15-Apr-18         14:45:00          25   
2                               22-Apr-18         14:45:00          25   
3                               29-Apr-18         14:45:00          25   
4                               06-May-18         14:45:00          25   

   Number Booked  Price (INR)  
0             12        499.0  
1             15        499.0  
2             14        499.0  
3              9        499.0  
4              7        499.0  
  ActivitySiteID     ActivityDescription  \
0            BRP  20:20:20  9.30-10.30am   
1   

In [140]:
df = pd.concat([df1, df2], ignore_index=True)
df

Unnamed: 0,ActivitySiteID,ActivityDescription,BookingEndDateTime (Month / Day / Year),BookingStartTime,MaxBookees,Number Booked,Price (INR)
0,HXP,20-20-20 2.45pm-3.45pm,08-Apr-18,14:45:00,25,12,499.0
1,HXP,20-20-20 2.45pm-3.45pm,15-Apr-18,14:45:00,25,15,499.0
2,HXP,20-20-20 2.45pm-3.45pm,22-Apr-18,14:45:00,25,14,499.0
3,HXP,20-20-20 2.45pm-3.45pm,29-Apr-18,14:45:00,25,9,499.0
4,HXP,20-20-20 2.45pm-3.45pm,06-May-18,14:45:00,25,7,499.0
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18-Jun-18,18:30:00,18,9,1299.0
3285,TSC,Zumba 6.30-7.30pm,25-Jun-18,18:30:00,18,9,1299.0
3286,TSC,Zumba - 6-7pm,07-Jun-18,18:00:00,50,16,1299.0
3287,TSC,Zumba - 6-7pm,14-Jun-18,18:00:00,50,13,1299.0


In [141]:
df = df.drop_duplicates()
df

Unnamed: 0,ActivitySiteID,ActivityDescription,BookingEndDateTime (Month / Day / Year),BookingStartTime,MaxBookees,Number Booked,Price (INR)
0,HXP,20-20-20 2.45pm-3.45pm,08-Apr-18,14:45:00,25,12,499.0
1,HXP,20-20-20 2.45pm-3.45pm,15-Apr-18,14:45:00,25,15,499.0
2,HXP,20-20-20 2.45pm-3.45pm,22-Apr-18,14:45:00,25,14,499.0
3,HXP,20-20-20 2.45pm-3.45pm,29-Apr-18,14:45:00,25,9,499.0
4,HXP,20-20-20 2.45pm-3.45pm,06-May-18,14:45:00,25,7,499.0
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18-Jun-18,18:30:00,18,9,1299.0
3285,TSC,Zumba 6.30-7.30pm,25-Jun-18,18:30:00,18,9,1299.0
3286,TSC,Zumba - 6-7pm,07-Jun-18,18:00:00,50,16,1299.0
3287,TSC,Zumba - 6-7pm,14-Jun-18,18:00:00,50,13,1299.0


In [142]:
df = df.dropna()
df

Unnamed: 0,ActivitySiteID,ActivityDescription,BookingEndDateTime (Month / Day / Year),BookingStartTime,MaxBookees,Number Booked,Price (INR)
0,HXP,20-20-20 2.45pm-3.45pm,08-Apr-18,14:45:00,25,12,499.0
1,HXP,20-20-20 2.45pm-3.45pm,15-Apr-18,14:45:00,25,15,499.0
2,HXP,20-20-20 2.45pm-3.45pm,22-Apr-18,14:45:00,25,14,499.0
3,HXP,20-20-20 2.45pm-3.45pm,29-Apr-18,14:45:00,25,9,499.0
4,HXP,20-20-20 2.45pm-3.45pm,06-May-18,14:45:00,25,7,499.0
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18-Jun-18,18:30:00,18,9,1299.0
3285,TSC,Zumba 6.30-7.30pm,25-Jun-18,18:30:00,18,9,1299.0
3286,TSC,Zumba - 6-7pm,07-Jun-18,18:00:00,50,16,1299.0
3287,TSC,Zumba - 6-7pm,14-Jun-18,18:00:00,50,13,1299.0


In [143]:
print(df.columns.tolist()) 

['ActivitySiteID', 'ActivityDescription', 'BookingEndDateTime (Month / Day / Year)', 'BookingStartTime', 'MaxBookees', 'Number Booked', 'Price (INR)']


In [144]:
if 'BookingEndDateTime (Month / Day / Year)' in df.columns:
    df.loc[:, 'BookingEndDateTime'] = pd.to_datetime(
        df['BookingEndDateTime (Month / Day / Year)'],
        format='%m/%d/%Y %H:%M:%S',  # adjust as needed
        errors='coerce'
    )
    df.drop(columns=['BookingEndDateTime (Month / Day / Year)'], inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'BookingEndDateTime'] = pd.to_datetime(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['BookingEndDateTime (Month / Day / Year)'], inplace=True)


Unnamed: 0,ActivitySiteID,ActivityDescription,BookingStartTime,MaxBookees,Number Booked,Price (INR),BookingEndDateTime
0,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,12,499.0,NaT
1,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,15,499.0,NaT
2,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,14,499.0,NaT
3,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,9,499.0,NaT
4,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,7,499.0,NaT
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3285,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3286,TSC,Zumba - 6-7pm,18:00:00,50,16,1299.0,NaT
3287,TSC,Zumba - 6-7pm,18:00:00,50,13,1299.0,NaT


In [145]:
df['MaxBookees'] = pd.to_numeric(df['MaxBookees'], errors='coerce')
df['Number Booked'] = pd.to_numeric(df['Number Booked'], errors='coerce')
df['Price (INR)'] = pd.to_numeric(df['Price (INR)'], errors='coerce')
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['MaxBookees'] = pd.to_numeric(df['MaxBookees'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Number Booked'] = pd.to_numeric(df['Number Booked'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Price (INR)'] = pd.to_numeric(df['Price (INR)'], errors='co

Unnamed: 0,ActivitySiteID,ActivityDescription,BookingStartTime,MaxBookees,Number Booked,Price (INR),BookingEndDateTime
0,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,12,499.0,NaT
1,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,15,499.0,NaT
2,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,14,499.0,NaT
3,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,9,499.0,NaT
4,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,7,499.0,NaT
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3285,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3286,TSC,Zumba - 6-7pm,18:00:00,50,16,1299.0,NaT
3287,TSC,Zumba - 6-7pm,18:00:00,50,13,1299.0,NaT


In [146]:
df['BookingStartTime'] = pd.to_datetime(df['BookingStartTime'], errors='coerce').dt.time
df


  df['BookingStartTime'] = pd.to_datetime(df['BookingStartTime'], errors='coerce').dt.time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['BookingStartTime'] = pd.to_datetime(df['BookingStartTime'], errors='coerce').dt.time


Unnamed: 0,ActivitySiteID,ActivityDescription,BookingStartTime,MaxBookees,Number Booked,Price (INR),BookingEndDateTime
0,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,12,499.0,NaT
1,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,15,499.0,NaT
2,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,14,499.0,NaT
3,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,9,499.0,NaT
4,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,7,499.0,NaT
...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3285,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT
3286,TSC,Zumba - 6-7pm,18:00:00,50,16,1299.0,NaT
3287,TSC,Zumba - 6-7pm,18:00:00,50,13,1299.0,NaT


In [147]:
scaler = StandardScaler()
df[['MaxBookees_scaled', 'Number Booked_scaled', 'Price_scaled']] = scaler.fit_transform(
    df[['MaxBookees', 'Number Booked', 'Price (INR)']]
)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['MaxBookees_scaled', 'Number Booked_scaled', 'Price_scaled']] = scaler.fit_transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['MaxBookees_scaled', 'Number Booked_scaled', 'Price_scaled']] = scaler.fit_transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['MaxBookees_scaled', 

Unnamed: 0,ActivitySiteID,ActivityDescription,BookingStartTime,MaxBookees,Number Booked,Price (INR),BookingEndDateTime,MaxBookees_scaled,Number Booked_scaled,Price_scaled
0,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,12,499.0,NaT,-0.477833,-0.538786,-1.708124
1,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,15,499.0,NaT,-0.477833,-0.223458,-1.708124
2,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,14,499.0,NaT,-0.477833,-0.328567,-1.708124
3,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,9,499.0,NaT,-0.477833,-0.854114,-1.708124
4,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,7,499.0,NaT,-0.477833,-1.064333,-1.708124
...,...,...,...,...,...,...,...,...,...,...
3284,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT,-0.940824,-0.854114,-0.698680
3285,TSC,Zumba 6.30-7.30pm,18:30:00,18,9,1299.0,NaT,-0.940824,-0.854114,-0.698680
3286,TSC,Zumba - 6-7pm,18:00:00,50,16,1299.0,NaT,1.175706,-0.118348,-0.698680
3287,TSC,Zumba - 6-7pm,18:00:00,50,13,1299.0,NaT,1.175706,-0.433677,-0.698680


In [148]:
quality_report = {
    "Total Rows": len(df),
    "Missing Values": df.isnull().sum().to_dict(),
    "Data Types": df.dtypes.to_dict(),
    "Duplicate Entries Removed": len(df1) + len(df2) - len(df)
}

# Display the cleaned dataset head and quality report
df_cleaned_head = df.head()

# Define the file path first
df_cleaned_path = "cleaned_dynamic_pricing_data.csv"  # Save in current directory

# Now save the file
df.to_csv(df_cleaned_path, index=False)

# Optional display output
print(" Data saved to:", df_cleaned_path)
print("\n Quality Report:\n", quality_report)
df_cleaned_head


 Data saved to: cleaned_dynamic_pricing_data.csv

 Quality Report:
 {'Total Rows': 3271, 'Missing Values': {'ActivitySiteID': 0, 'ActivityDescription': 0, 'BookingStartTime': 0, 'MaxBookees': 0, 'Number Booked': 0, 'Price (INR)': 0, 'BookingEndDateTime': 3271, 'MaxBookees_scaled': 0, 'Number Booked_scaled': 0, 'Price_scaled': 0}, 'Data Types': {'ActivitySiteID': dtype('O'), 'ActivityDescription': dtype('O'), 'BookingStartTime': dtype('O'), 'MaxBookees': dtype('int64'), 'Number Booked': dtype('int64'), 'Price (INR)': dtype('float64'), 'BookingEndDateTime': dtype('<M8[ns]'), 'MaxBookees_scaled': dtype('float64'), 'Number Booked_scaled': dtype('float64'), 'Price_scaled': dtype('float64')}, 'Duplicate Entries Removed': 18}


Unnamed: 0,ActivitySiteID,ActivityDescription,BookingStartTime,MaxBookees,Number Booked,Price (INR),BookingEndDateTime,MaxBookees_scaled,Number Booked_scaled,Price_scaled
0,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,12,499.0,NaT,-0.477833,-0.538786,-1.708124
1,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,15,499.0,NaT,-0.477833,-0.223458,-1.708124
2,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,14,499.0,NaT,-0.477833,-0.328567,-1.708124
3,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,9,499.0,NaT,-0.477833,-0.854114,-1.708124
4,HXP,20-20-20 2.45pm-3.45pm,14:45:00,25,7,499.0,NaT,-0.477833,-1.064333,-1.708124
