In [11]:
import pandas as pd
import plotly.express as px

# Load and preprocess data
cc_data = pd.read_csv("MC2/cc_data.csv", encoding="latin1", parse_dates=['timestamp'])
loyalty_data = pd.read_csv("MC2/loyalty_data.csv", encoding="latin1", parse_dates=['timestamp'])

# Combine datasets with identifiers
combined_data = pd.concat([cc_data.assign(type='Credit Card'), loyalty_data.assign(type='Loyalty Card')])

# Overall location visit counts
location_counts = combined_data['location'].value_counts().reset_index()
location_counts.columns = ['location', 'visits']

# Bar chart for overall visits
fig = px.bar(location_counts, x='location', y='visits', title='Overall Visits per Location')
fig.show()

# Time analysis for top locations
top_locations = location_counts.head(5)['location']
top_data = combined_data[combined_data['location'].isin(top_locations)]
top_data['hour'] = top_data['timestamp'].dt.hour

# Hourly visits
hourly_visits = top_data.groupby(['hour', 'location']).size().reset_index(name='counts')
fig_hourly = px.line(hourly_visits, x='hour', y='counts', color='location', title='Hourly Visits at Top Locations')
fig_hourly.show()

# Heatmap of hourly activity
heatmap_data = top_data.groupby(['hour', 'location']).size().unstack(fill_value=0)
fig_heatmap = px.imshow(heatmap_data, labels=dict(x="Hour of the Day", y="Location", color="Visits"),
                        title="Hourly Activity Heatmap for Top Locations")
fig_heatmap.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Load your data
cc_data = pd.read_csv("MC2/cc_data.csv", encoding="latin1", parse_dates=['timestamp'])
loyalty_data = pd.read_csv("MC2/loyalty_data.csv", encoding="latin1", parse_dates=['timestamp'])
combined_data = pd.concat([cc_data, loyalty_data])

# Initialize the Dash app
app = dash.Dash(__name__)

# App layout
app.layout = html.Div([
    html.Div([
        dcc.DatePickerSingle(
            id='date-picker',
            min_date_allowed=combined_data['timestamp'].min().date(),
            max_date_allowed=combined_data['timestamp'].max().date(),
            initial_visible_month=combined_data['timestamp'].min().date(),
            date=str(combined_data['timestamp'].min().date())
        ),
        dcc.RangeSlider(
            id='time-slider',
            min=0,
            max=23,
            step=1,
            marks={i: f'{i}:00' for i in range(24)},
            value=[0, 23]
        )
    ]),
    dcc.Graph(id='location-bar-chart')
])

# Callback to update the graph based on the date and time range picked
@app.callback(
    Output('location-bar-chart', 'figure'),
    [Input('date-picker', 'date'),
     Input('time-slider', 'value')]
)
def update_chart(selected_date, time_range):
    # Filter data based on the selected date
    filtered_data = combined_data[
        (combined_data['timestamp'].dt.date == pd.to_datetime(selected_date).date()) &
        (combined_data['timestamp'].dt.hour >= time_range[0]) &
        (combined_data['timestamp'].dt.hour <= time_range[1])
    ]
    location_counts = filtered_data['location'].value_counts().reset_index()
    location_counts.columns = ['location', 'visits']
    
    # Create bar chart
    fig = px.bar(location_counts, x='location', y='visits', title='Visits by Location for Selected Date and Time')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


