In [1]:
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns

In [2]:
# Read data from CSV file
data = pd.read_csv("PostPandemic.pandemic1.csv")

In [3]:
data.columns


Index(['_id', 'Entity', 'Code', 'Day', 'retail_and_recreation',
       'grocery_and_pharmacy', 'residential', 'transit_stations', 'parks',
       'workplaces'],
      dtype='object')

###Visualise the dataset returned from MongoDB

In [6]:
data.head()

Unnamed: 0,_id,Entity,Code,Day,retail_and_recreation,grocery_and_pharmacy,residential,transit_stations,parks,workplaces
0,64c5c5da382d3a96f854375b,Angola,AGO,2020-02-17T00:00:00.000Z,-2.333,-2.0,1.0,0.333,-0.667,0.667
1,64c5c5da382d3a96f854375c,Angola,AGO,2020-02-18T00:00:00.000Z,-2.5,-2.5,0.5,0.0,-1.5,1.75
2,64c5c5da382d3a96f854375d,Angola,AGO,2020-02-19T00:00:00.000Z,-4.0,-4.4,0.6,-1.4,-1.8,2.2
3,64c5c5da382d3a96f854375e,Angola,AGO,2020-02-20T00:00:00.000Z,-4.0,-4.5,0.333,-1.667,-1.5,2.5
4,64c5c5da382d3a96f854375f,Angola,AGO,2020-02-21T00:00:00.000Z,-3.714,-4.0,0.286,-1.429,-0.857,2.857


###Visualisation of mobility trends by Category

In [7]:
def time_series_multiline(df, timelike_colname, value_colnames, aggregate='mean', figsize=(800, 500), mpl_palette_name='Dark2'):
    palette = sns.color_palette(mpl_palette_name).as_hex()
    fig = go.Figure()

    for value_colname in value_colnames:
        xs, ys = get_aggregated_values(df, timelike_colname, value_colname, aggregate)
        fig.add_trace(go.Scatter(x=xs, y=ys, mode='lines', name=value_colname, hovertemplate='Date: %{x}<br>Value: %{y}<br>Series: %{name}'))

    fig.update_layout(
        title='COVID Mobility Trends by Category',
        xaxis_title=timelike_colname,
        yaxis_title=f'Aggregated {aggregate}',
        showlegend=True,
        width=figsize[0],
        height=figsize[1],
        margin=dict(l=50, r=50, t=50, b=50),
    )

    return fig

def get_aggregated_values(df, timelike_colname, value_colname, aggregate='mean'):
    grouped = df.groupby(timelike_colname)[value_colname].agg(aggregate).reset_index()
    return grouped[timelike_colname], grouped[value_colname]


# Arguments can be modifed according to the categories
value_colnames = ['retail_and_recreation', 'grocery_and_pharmacy', 'residential', 'transit_stations']
data['Day'] = pd.to_datetime(data['Day'])  # Convert 'Day' to Timestamp
data.sort_values('Day', inplace=True)  # Sort the DataFrame by 'Day' column
chart = time_series_multiline(data, 'Day', value_colnames, aggregate='mean', figsize=(1000, 500))

# Display the chart
chart.show()


###Visualisation data to identify countries with lowest percentage change in retail and recreation  for the given period of time

In [8]:

def plot_pie_chart_bottom(df, value_colname, n_bottom=5):
    grouped = df.groupby('Entity')[value_colname].mean().abs().nsmallest(n_bottom).reset_index()

    fig = go.Figure()
    fig.add_trace(go.Pie(labels=grouped['Entity'], values=grouped[value_colname], hole=0.4, textinfo='label+percent'))

    fig.update_layout(
        title=f'Bottom {n_bottom} Countries by {value_colname}',
        showlegend=True,
    )

    return fig

# Arguments can be modifed according to the category in the date
bottom_countries_pie_chart = plot_pie_chart_bottom(data, 'retail_and_recreation', n_bottom=5)

# Display the pie chart
bottom_countries_pie_chart.show()


###Visualisation data to identify countries with highest percentage change in retail and recreation  for the given period of time

In [9]:

def plot_pie_chart(df, value_colname, n_top=5):
    grouped = df.groupby('Entity')[value_colname].mean().abs().nlargest(n_top).reset_index()

    fig = go.Figure()
    fig.add_trace(go.Pie(labels=grouped['Entity'], values=grouped[value_colname], hole=0.4, textinfo='label+percent'))

    fig.update_layout(
        title=f'Top {n_top} Countries by percentage change in {value_colname}',
        showlegend=True,
    )

    return fig

def plot_trendline_chart(df, timelike_colname, value_colname, top_countries):
    filtered_df = df[df['Entity'].isin(top_countries)]

    fig = go.Figure()

    for country in top_countries:
        country_data = filtered_df[filtered_df['Entity'] == country]
        xs, ys = get_xy_values(country_data, timelike_colname, value_colname)
        fig.add_trace(go.Scatter(x=xs, y=ys, mode='lines', name=country))

    fig.update_layout(
        title=f'Trendline Chart for Top {len(top_countries)} Countries by percentage change in {value_colname}',
        xaxis_title=timelike_colname,
        yaxis_title=value_colname,
        showlegend=True,
        width=800,
        height=500,
        margin=dict(l=50, r=50, t=50, b=50),
    )

    return fig

def get_xy_values(df, timelike_colname, value_colname):
    xs = df[timelike_colname]
    ys = df[value_colname]
    return xs, ys

# Arguments can be modifed according to the category in the date
top_countries_pie_chart = plot_pie_chart(data, 'retail_and_recreation', n_top=5)
top_countries = top_countries_pie_chart.data[0].labels

trendline_chart = plot_trendline_chart(data, 'Day', 'retail_and_recreation', top_countries)

# Display the pie chart and trendline chart
top_countries_pie_chart.show()
trendline_chart.show()


###Data Visulisation Specific Country wise

###Mobility Data Visulisation for Angola based on categoies retail_and_recreation, grocery_and_pharmacy, residential, transit_stations categories




In [27]:
data_angola = pd.read_csv("AngolaPostPandemic.pandemic.csv")
data = data_angola
data.columns

Index(['_id', 'Entity', 'Code', 'Day', 'retail_and_recreation',
       'grocery_and_pharmacy', 'residential', 'transit_stations', 'parks',
       'workplaces'],
      dtype='object')

In [28]:
data.head(5)

Unnamed: 0,_id,Entity,Code,Day,retail_and_recreation,grocery_and_pharmacy,residential,transit_stations,parks,workplaces
0,64c5c5da382d3a96f854375b,Angola,AGO,2020-02-17T00:00:00.000Z,-2.333,-2.0,1.0,0.333,-0.667,0.667
1,64c5c5da382d3a96f854375c,Angola,AGO,2020-02-18T00:00:00.000Z,-2.5,-2.5,0.5,0.0,-1.5,1.75
2,64c5c5da382d3a96f854375d,Angola,AGO,2020-02-19T00:00:00.000Z,-4.0,-4.4,0.6,-1.4,-1.8,2.2
3,64c5c5da382d3a96f854375e,Angola,AGO,2020-02-20T00:00:00.000Z,-4.0,-4.5,0.333,-1.667,-1.5,2.5
4,64c5c5da382d3a96f854375f,Angola,AGO,2020-02-21T00:00:00.000Z,-3.714,-4.0,0.286,-1.429,-0.857,2.857


In [29]:

def time_series_multiline(df, timelike_colname, value_colnames, countries=None, aggregate='mean', figsize=(800, 500), mpl_palette_name='Dark2'):
    palette = sns.color_palette(mpl_palette_name).as_hex()
    fig = go.Figure()

    if countries:
        df = df[df['Entity'].isin(countries)]

    for value_colname in value_colnames:
        xs, ys = get_aggregated_values(df, timelike_colname, value_colname, aggregate)
        fig.add_trace(go.Scatter(x=xs, y=ys, mode='lines', name=value_colname))

    fig.update_layout(
        title='Time Series Chart for Angola',
        xaxis_title=timelike_colname,
        yaxis_title=f'Aggregated {aggregate}',
        showlegend=True,
        width=figsize[0],
        height=figsize[1],
        margin=dict(l=50, r=50, t=50, b=50),
    )

    return fig

def get_aggregated_values(df, timelike_colname, value_colname, aggregate='mean'):
    grouped = df.groupby(timelike_colname)[value_colname].agg(aggregate).reset_index()
    return grouped[timelike_colname], grouped[value_colname]

# Modify the arguments accordingly based on your DataFrame columns and specific countries
value_colnames = ['retail_and_recreation', 'grocery_and_pharmacy', 'residential', 'transit_stations']
specific_countries = ['Angola', 'India', 'China']  # Replace with the specific countries you want to plot
data['Day'] = pd.to_datetime(data['Day'])  # Convert 'Day' to Timestamp
data.sort_values('Day', inplace=True)  # Sort the DataFrame by 'Day' column
chart = time_series_multiline(data, 'Day', value_colnames, countries=specific_countries, aggregate='mean', figsize=(1000, 500))

# Display the chart
chart.show()


###Mobility Data Visulisation for Canada based on categoies retail_and_recreation, grocery_and_pharmacy, residential, transit_stations categories

In [30]:
data_canada = pd.read_csv("CAN_PostPandemic.pandemic.csv")
data = data_canada
data.columns

Index(['_id', 'Entity', 'Code', 'Day', 'retail_and_recreation',
       'grocery_and_pharmacy', 'residential', 'transit_stations', 'parks',
       'workplaces'],
      dtype='object')

In [31]:
data.head(5)

Unnamed: 0,_id,Entity,Code,Day,retail_and_recreation,grocery_and_pharmacy,residential,transit_stations,parks,workplaces
0,64c5c5de382d3a96f8548add,Canada,CAN,2020-02-17T00:00:00.000Z,1.667,-1.667,3.0,-7.0,38.0,-17.0
1,64c5c5de382d3a96f8548ade,Canada,CAN,2020-02-18T00:00:00.000Z,1.0,-0.25,2.5,-5.5,30.0,-13.0
2,64c5c5de382d3a96f8548adf,Canada,CAN,2020-02-19T00:00:00.000Z,1.0,0.0,2.0,-4.4,25.8,-10.4
3,64c5c5de382d3a96f8548ae0,Canada,CAN,2020-02-20T00:00:00.000Z,1.833,0.667,1.667,-3.833,23.833,-8.5
4,64c5c5de382d3a96f8548ae1,Canada,CAN,2020-02-21T00:00:00.000Z,1.857,0.429,1.571,-3.429,21.857,-7.714


In [32]:
def time_series_multiline(df, timelike_colname, value_colnames, countries=None, aggregate='mean', figsize=(800, 500), mpl_palette_name='Dark2'):
    palette = sns.color_palette(mpl_palette_name).as_hex()
    fig = go.Figure()

    if countries:
        df = df[df['Entity'].isin(countries)]

    for value_colname in value_colnames:
        xs, ys = get_aggregated_values(df, timelike_colname, value_colname, aggregate)
        fig.add_trace(go.Scatter(x=xs, y=ys, mode='lines', name=value_colname))

    fig.update_layout(
        title='Time Series Chart for Canada',
        xaxis_title=timelike_colname,
        yaxis_title=f'Aggregated {aggregate}',
        showlegend=True,
        width=figsize[0],
        height=figsize[1],
        margin=dict(l=50, r=50, t=50, b=50),
    )

    return fig

def get_aggregated_values(df, timelike_colname, value_colname, aggregate='mean'):
    grouped = df.groupby(timelike_colname)[value_colname].agg(aggregate).reset_index()
    return grouped[timelike_colname], grouped[value_colname]

# Arguments can be modifed according to the category in the date
value_colnames = ['retail_and_recreation', 'grocery_and_pharmacy', 'residential', 'transit_stations']
specific_countries = ['Angola', 'India', 'China','Canada']  # Replace with the specific countries you want to plot
data['Day'] = pd.to_datetime(data['Day'])  # Convert 'Day' to Timestamp
data.sort_values('Day', inplace=True)  # Sort the DataFrame by 'Day' column
chart = time_series_multiline(data, 'Day', value_colnames, countries=specific_countries, aggregate='mean', figsize=(1000, 500))

# Display the chart
chart.show()


###Mobility Data Visulisation for India based on categoies retail_and_recreation, grocery_and_pharmacy, residential, transit_stations categories

In [33]:
data_india = pd.read_csv("IND_PostPandemic.pandemic.csv")
data = data_india
data.columns

Index(['_id', 'Entity', 'Code', 'Day', 'retail_and_recreation',
       'grocery_and_pharmacy', 'residential', 'transit_stations', 'parks',
       'workplaces'],
      dtype='object')

In [34]:

data.head(5)

Unnamed: 0,_id,Entity,Code,Day,retail_and_recreation,grocery_and_pharmacy,residential,transit_stations,parks,workplaces
0,64c5c5e3382d3a96f854f4e4,India,IND,2020-02-17T00:00:00.000Z,0.667,1.667,0.0,2.0,3.0,3.0
1,64c5c5e3382d3a96f854f4e5,India,IND,2020-02-18T00:00:00.000Z,0.5,1.75,0.0,2.0,3.25,3.0
2,64c5c5e3382d3a96f854f4e6,India,IND,2020-02-19T00:00:00.000Z,0.4,1.8,0.2,1.8,2.8,3.2
3,64c5c5e3382d3a96f854f4e7,India,IND,2020-02-20T00:00:00.000Z,0.5,2.0,0.0,2.333,3.167,3.333
4,64c5c5e3382d3a96f854f4e8,India,IND,2020-02-21T00:00:00.000Z,-0.143,1.714,0.714,1.429,3.571,0.143


In [35]:
def time_series_multiline(df, timelike_colname, value_colnames, countries=None, aggregate='mean', figsize=(800, 500), mpl_palette_name='Dark2'):
    palette = sns.color_palette(mpl_palette_name).as_hex()
    fig = go.Figure()

    if countries:
        df = df[df['Entity'].isin(countries)]

    for value_colname in value_colnames:
        xs, ys = get_aggregated_values(df, timelike_colname, value_colname, aggregate)
        fig.add_trace(go.Scatter(x=xs, y=ys, mode='lines', name=value_colname))

    fig.update_layout(
        title='Time Series Chart for INdia',
        xaxis_title=timelike_colname,
        yaxis_title=f'Aggregated {aggregate}',
        showlegend=True,
        width=figsize[0],
        height=figsize[1],
        margin=dict(l=50, r=50, t=50, b=50),
    )

    return fig

def get_aggregated_values(df, timelike_colname, value_colname, aggregate='mean'):
    grouped = df.groupby(timelike_colname)[value_colname].agg(aggregate).reset_index()
    return grouped[timelike_colname], grouped[value_colname]


# Arguments can be modifed according to the category in the date
value_colnames = ['retail_and_recreation', 'grocery_and_pharmacy', 'residential', 'transit_stations']
specific_countries = ['Angola', 'India', 'China','Canada']  # Replace with the specific countries you want to plot
data['Day'] = pd.to_datetime(data['Day'])  # Convert 'Day' to Timestamp
data.sort_values('Day', inplace=True)  # Sort the DataFrame by 'Day' column
chart = time_series_multiline(data, 'Day', value_colnames, countries=specific_countries, aggregate='mean', figsize=(1000, 500))

# Display the chart
chart.show()
