In [None]:
import pandas as pd
import requests

data_url = "https://open.jacksonms.gov/datastore/odata3.0/2fd76604-4378-4e19-9f0f-1bf220825c97?$format=json"

# Disable SSL verification (not recommended for production)
response = requests.get(data_url, verify=False)

# Convert to DataFrame
data = response.json()
df = pd.json_normalize(data['value'])

# Display the first few rows of the DataFrames
print(df.head())

   _id  Date                       Leasee  \
0    1  2009         Gemini Entertainment   
1    2  2009              W. Kessler, LTD   
2    3  2009         Mississippi Symphony   
3    4  2009  Mississippi Music Educators   
4    5  2009              W. Kessler, LTD   

                                       Name of Event        Place      Type  \
0  R&B Concert featuring Tre' Williams, Calvin Ri...  Thalia Mara   Concert   
1                                            Rave On  Thalia Mara   Concert   
2                                Beethoven, The Hero  Thalia Mara  Symphony   
3                                  Mississippi Sings  Thalia Mara    Chorus   
4                                            Camelot  Thalia Mara   Musical   

     Revenue Attendance  
0  2550.0000       1000  
1  5100.0000       3800  
2  1150.0000       1300  
3   900.0000       4000  
4  5100.0000       4200  




In [16]:
# Remove rows with any NaN values
clean_df = df.dropna().copy()

print(clean_df.head())

# Wrte new data frame to csv
clean_df.to_csv('data/clean_data.csv', index=False)

   _id  Date                       Leasee  \
0    1  2009         Gemini Entertainment   
1    2  2009              W. Kessler, LTD   
2    3  2009         Mississippi Symphony   
3    4  2009  Mississippi Music Educators   
4    5  2009              W. Kessler, LTD   

                                       Name of Event        Place      Type  \
0  R&B Concert featuring Tre' Williams, Calvin Ri...  Thalia Mara   Concert   
1                                            Rave On  Thalia Mara   Concert   
2                                Beethoven, The Hero  Thalia Mara  Symphony   
3                                  Mississippi Sings  Thalia Mara    Chorus   
4                                            Camelot  Thalia Mara   Musical   

     Revenue Attendance  
0  2550.0000       1000  
1  5100.0000       3800  
2  1150.0000       1300  
3   900.0000       4000  
4  5100.0000       4200  


In [17]:
# Ensure the 'Attendance' column is numeric
clean_df['Attendance'] = pd.to_numeric(clean_df['Attendance'], errors='coerce')

# Calculate the median of the 'Attendance' column
clean_df['Attendance'].median()

1600.0

In [18]:
concert_data = clean_df.loc[clean_df['Type'] == "Concert"]
print(concert_data.head())

    _id  Date                           Leasee  \
0     1  2009             Gemini Entertainment   
1     2  2009                  W. Kessler, LTD   
14   15  2010           TLS Management Company   
17   18  2010        Charles Heron Enterprises   
43   44  2010  Unstoppable Entertainment, Inc.   

                                        Name of Event        Place     Type  \
0   R&B Concert featuring Tre' Williams, Calvin Ri...  Thalia Mara  Concert   
1                                             Rave On  Thalia Mara  Concert   
14                               Robin Thicke Concert  Thalia Mara  Concert   
17                            I Can't Stop Loving You  Thalia Mara  Concert   
43                             Congregation Gone Wild  Thalia Mara  Concert   

      Revenue  Attendance  
0   2550.0000        1000  
1   5100.0000        3800  
14  1550.0000        2360  
17  5895.7700        2519  
43  1250.0000         800  


In [None]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

from bokeh.palettes import Set3_10
from bokeh.transform import factor_cmap

# Output the plot to the notebook
# To save as html, import ouput_file from bokeh.plottig import output_file
# Use the code: output_file("revenue_barplot.html")
output_notebook()

# Convert Revenue column to floats to sum
clean_df['Revenue'] = clean_df['Revenue'].astype(float)
# Group data by Date
grouped = clean_df.groupby('Date')['Revenue'].sum()

print(grouped)

Date
2009     16050.00
2010     93638.49
2011     92237.32
2012     75251.00
2013     90547.43
2014     67016.00
2015    155918.31
2016    214761.20
2017    130920.18
2018    141346.25
Name: Revenue, dtype: float64


In [None]:
source = ColumnDataSource(pd.DataFrame(grouped))
dates = source.data['Date'].tolist()
p = figure(x_range=dates)
color_map = factor_cmap(field_name='Date', palette=Set3_10, factors=dates)

p.vbar(x='Date', top='Revenue', source=source, width=0.70, color=color_map)

p.title.text ='Jackson Event Revenue by Year'
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Revenue'

show(p)