Add your import statements and the database connection statements in the below code block

Database file path: /course/data/CSE-578/dinofunworld.db

In [0]:
# your code here
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Connect to the database
db_path = '/course/data/CSE-578/dinofunworld.db'
conn = sqlite3.connect(db_path)

### **Question 1**:
Create and display a control chart showing attendance at the ride 'Atmosfear' over the data provided. In the control chart, display the attendance, the mean attendance, and bands for one and two standard deviations away from the average.

**Note:** 
* For this question, display the control chart in the notebook and print the mean attendance and standard deviation values as a list. 
For example: [mean, standard_deviation]
* Create a single Control chart that displays the mean, standard deviation, and attendance, adhering to the specified line style and color as outlined below.
    * Mean: 'g-'
    * +2 STD and -2 STD: 'r-'
    * +STD and -STD: 'y-'
    * Attendance: 'b-'
* The Control Chart should also have the below mentioned parameters,
    * legend that displays the mean and standard deviation adhering to the specified line and color. Note: Legend should be outside the plot.
    * title named '**Control Chart of attendance at Atmosfear**'
    * x-label ('**Time in minutes**') and y-label ('**Attendance**')
* The plot must not have the following parameters,
    * do not set figure size
    * do not set font size or font-weight for titles and labels


In [0]:
### TEST FUNCTION: test_question1
# DO NOT REMOVE OR MODIFY THE ABOVE LINE
# your code here

query_attraction = """
SELECT AttractionID FROM attraction WHERE Name = 'Atmosfear'
"""
atmosfear_id = pd.read_sql(query_attraction, conn)['AttractionID'].iloc[0]

# Query to get check-in sequences containing 'Atmosfear'
query_sequences = f"""
SELECT visitorID, sequence FROM sequences WHERE sequence LIKE '%{atmosfear_id}%';
"""
sequences = pd.read_sql(query_sequences, conn)
sequence_stats = sequences.copy()
sequence_stats['sequence_list'] = sequence_stats['sequence'].apply(lambda s: [1 if x == str(atmosfear_id) else 0 for x in s.split("-")])

# Compute attendance
attendance = np.sum(sequence_stats['sequence_list'].values.tolist(), axis=0)
mean = np.nanmean(attendance)
std = np.nanstd(attendance)

# Plot control chart
x_axis_list = range(0, len(attendance)*5, 5)
plt.figure(figsize=(12,6))
plt.plot(x_axis_list, [mean+2*std]*len(attendance), 'r-', label='+2 Std Dev')
plt.plot(x_axis_list, [mean-2*std]*len(attendance), 'r-', label='-2 Std Dev')
plt.plot(x_axis_list, [mean+std]*len(attendance), 'y-', label='+1 Std Dev')
plt.plot(x_axis_list, [mean-std]*len(attendance), 'y-', label='-1 Std Dev')
plt.plot(x_axis_list, [mean]*len(attendance), 'g-', label='Mean')
plt.plot(x_axis_list, attendance, 'b-', label='Attendance')
plt.ylabel('Attendance')
plt.xlabel('Time in minutes')
plt.title('Control Chart of attendance at Atmosfear')
plt.legend()
plt.show()

# Print mean and standard deviation in required format
print([float(mean), float(std)])

### **Question 2**:
Create and display a moving average chart of the attendance at 'Atmosfear' over the data provided. Use a window size of 50 for this moving average.

**Note:** 
* The Moving Average Chart should have the below mentioned parameters,
    * set mode to 'same' while performing convolution
    * title named '**Moving Average Chart of attendance at Atmosfear**'
    * x-label ('**Time in minutes**') and y-label ('**Attendance**')
* The plot must not have the following parameters,
    * legend
    * do not set figure size
    * do not set font size or font-weight for titles and labels


In [0]:
### TEST FUNCTION: test_question2
# DO NOT REMOVE OR MODIFY THE ABOVE LINE
# your code here

moving_avg = np.convolve(attendance, np.ones(50)/50, mode='valid')

# Plot moving average
x_axis_ma = range(0, len(moving_avg)*5, 5)
plt.figure(figsize=(12,6))
plt.plot(x_axis_list, attendance, 'b-', alpha=0.5, label='Original Attendance')
plt.plot(x_axis_ma, moving_avg, 'r-', label='50-Sample Moving Average')
plt.ylabel('Attendance')
plt.xlabel('Time in minutes')
plt.title('Moving Average Chart of attendance at Atmosfear')
plt.legend()
plt.show()

### **Question 3**:
Create and display an exponentially-weighted moving average chart of the attendance at 'Atmosfear' over the data provided. Again, use a window size of 50 for this weighted moving average.

**Note:** 
* The Weighted Moving Average Chart should have the below mentioned parameters,
    * title named '**Exponentially Weighted Moving Average Chart of the attendance at Atmosfear**'  
    * x-label ('**Time in minutes**') and y-label ('**Attendance**')
* The plot must not have the following parameters,
    * legend
    * do not set figure size
    * do not set font size or font-weight for titles and labels


In [0]:
### TEST FUNCTION: test_question3
# DO NOT REMOVE OR MODIFY THE ABOVE LINE
# your code here

# Compute 50-sample exponentially weighted moving average
exp_weighted_avg = pd.Series(attendance).ewm(span=50, adjust=False).mean()

# Plot exponentially weighted moving average
plt.figure(figsize=(12,6))
plt.plot(x_axis_list, attendance, 'b-', alpha=0.5, label='Original Attendance')
plt.plot(x_axis_list[:len(exp_weighted_avg)], exp_weighted_avg, 'purple', label='Exponentially Weighted Moving Average')
plt.ylabel('Attendance')
plt.xlabel('Time in minutes')
plt.title('Exponentially Weighted Moving Average Chart of the attendance at Atmosfear')
plt.legend()
plt.show()

# Close database connection
conn.close()