# Spaceship Programming Page 1 Analysis


In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
import json
%matplotlib inline

## Loading data

### Load and process data from Firebase

In [2]:
import firebase_admin
from firebase_admin import credentials, firestore

cred = credentials.Certificate("spaceship-programming-firebase-adminsdk-wtugk-247f046204.json")
firebase_admin.initialize_app(cred)


<firebase_admin.App at 0x137162b30>

In [3]:
collection_name = 'spaceship-logs-2023'

In [8]:
datab = firestore.client()

logsref = datab.collection(collection_name)
docs = logsref.stream()

logs_list = list(map(lambda x: x.to_dict(), docs))
df = pd.DataFrame(logs_list)

# df = df.drop("netTangoBlocks", axis=1)
# df = df[df.lessonId == 2637]  # There were a few rows from testing with a different lesson Id
df1 = df[df.pageNum == 1].reset_index()

#### Calculate trajectory, final velocity, final distance

In [12]:
import json
# 0: top engine - accelerate down
# 1: right engine - accelerate left
# 2: bottom engine - accelerate up
# 3: left engine - accelerate right
# (7, 8), (8, 8), (7, 7), (8, 7) - green patches
# start location: (-9, -9)

def calc_trajectory(engine_schedule, mass):
    engine_schedule = json.loads(engine_schedule)
    ticks = np.array([0])
    initial_xcor = -9
    initial_ycor = -9
    axs = np.array([0])
    ays = np.array([0])
    
    for row in engine_schedule:
        duration = row[0][1] - row[0][0] 
        if row[1] == []: # it was a wait block            
            ax = 0.0
            ay = 0.0
        else:
            ax = (row[1][3] - row[1][1]) / mass
            ay = (row[1][2] - row[1][0]) / mass
        axs = np.append(axs, [ax] * duration)
        ays = np.append(ays, [ay] * duration)
    
    vxs = axs.cumsum()
    vys = ays.cumsum()
    xcors = initial_xcor + vxs.cumsum()
    ycors = initial_ycor + vys.cumsum()
    
    vxf = np.round(vxs[-1], 10)
    vyf = np.round(vys[-1], 10)
    if vxf != 0 or vyf != 0:
        while abs(xcors[-1]) < 16 and abs(ycors[-1]) < 16:
            axs = np.append(axs, 0)
            ays = np.append(ays, 0)
            vxs = np.append(vxs, vxf)
            vys = np.append(vys, vyf)
            xcors = np.append(xcors, xcors[-1] + vxf)
            ycors = np.append(ycors, ycors[-1] + vyf)
    
    
    tdf = pd.DataFrame()
    tdf['tick'] = range(len(axs))    
    tdf['ax'] = axs
    tdf['ay'] = ays
    tdf['a'] = np.sqrt(np.square(axs) + np.square(ays))    
    tdf['color'] = tdf['a'].apply(lambda x: 'orange' if x > 0 else 'blue')
    tdf['vx'] = vxs
    tdf['vy'] = vys
    tdf['xcor'] = xcors
    tdf['ycor'] = ycors
    return tdf                    


def get_mass():
#     return 30 if np.isnan(m) else m  # The first time we did this, there was default mass of 30 but students could change it
    return 10

def get_trajectory(row):
    if row.name % 100 == 0:
        print(row.name)
    return calc_trajectory(row['engineSchedule'], get_mass()).to_dict()


In [13]:
df1['trajectory']= df1.apply(get_trajectory, axis=1)

0
100
200
300
400
500
600
700
800
900


In [14]:
# df1['final_speed'] = 
def final_speed(traj):
    max_key = max(traj['vx'].keys())
    return np.round(np.sqrt(traj['vx'][max_key] ** 2 + traj['vy'][max_key] ** 2), 10)


df1['final_speed'] = df1['trajectory'].apply(final_speed)

In [15]:
def final_distance(traj):
    xmin = 6.5
    xmax = 8.5
    xcenter = 7.5
    ymin = 6.5
    ymax = 8.5
    ycenter = 7.5
    max_key = max(traj['vx'].keys())
    xcor = traj['xcor'][max_key]
    ycor = traj['ycor'][max_key]
    if (xmin <= xcor <= xmax) and (ymin <= ycor <= ymax):
        return 0
    else:
        return np.sqrt((xcor - xcenter) ** 2 + (ycor - ycenter) ** 2 )

df1['final_distance'] = df1['trajectory'].apply(final_distance)

In [16]:
def convert_trajectory_to_lists(traj):
    traj_dict = {}
    for key in traj.keys():
        traj_dict[key] = list(traj[key].values())
    return traj_dict
    
df1['trajectory'] = df1['trajectory'].apply(convert_trajectory_to_lists)


In [17]:
df1.to_csv(f"{collection_name}_page1_data_lsts.csv")

In [None]:
df1[df1.userId==26807]

### Load Data from Pickle

In [None]:
df1 = pd.read_pickle('page1_df')

## Plotting Procedures

In [None]:
from IPython.display import display, Markdown, Latex

def plot_no_maze(xs, ys, c):
    plt.scatter(xs, ys, c=c, s=4)
    plt.plot([6.5, 8.5, 8.5, 6.5, 6.5], [6.5, 6.5, 8.5, 8.5, 6.5],color='g') # The target
    plt.rcParams["figure.figsize"] = (6,6)
    plt.xlim(-16, 16)
    plt.ylim(-16, 16)
    plt.show()

def plot_no_maze_from_dict(tdict, attempt=None, userId=None, timestamp=None):
    xs = tdict['xcor'].values()
    ys = tdict['ycor'].values()
    cs = tdict['color'].values()
    plt.scatter(xs, ys, c=cs, s=4)
    plt.plot([6.5, 8.5, 8.5, 6.5, 6.5], [6.5, 6.5, 8.5, 8.5, 6.5],color='g') # The target
    plt.rcParams["figure.figsize"] = (6,6)
    plt.xlim(-16, 16)
    plt.ylim(-16, 16)
    if timestamp:
        time = timestamp.tz_convert('America/Chicago').strftime("%H:%M:%S")
    plt.title(f'student {userId} attempt {attempt} at {time}')
    plt.show()

def plot_final_speed_and_distance_vs_attempt(sdf):
    plt.rcParams["figure.figsize"] = (6,6)
    plt.plot(range(sdf.shape[0]), sdf.final_speed)
    plt.title(f'{sdf.userId.iloc[0]} Final Speed vs Attempt')
    plt.ylim(0, 1)
    plt.show()
    plt.plot(range(sdf.shape[0]), sdf.final_distance)
    plt.ylim(0, sdf.final_distance.max())
    plt.title(f"{sdf.userId.iloc[0]} Final Distance From Target vs Attempt")
    plt.show()

## Class-Level Data 

In [None]:
# Number of attemps
max_attempts = df1.groupby('userId').aggregate('count').timestamp.max()
df1.groupby('userId').aggregate('count').timestamp.hist(bins=range(0, max_attempts, 5))
plt.title("Attempts to Solve 1st Page Distribution")
plt.show()

## Individual Level Data

In [None]:
dfs

In [None]:

for uid in [26825]:# df1.userId.unique():
    dfs = df1[df1.userId == uid].sort_values(by='timestamp').reset_index()
    display(Markdown(f'# student {uid}'))
    display(Markdown(f'### Final Speed and Distance By Attempt'))
    plot_final_speed_and_distance_vs_attempt(dfs)
    display(Markdown(f'### Student {uid} Individual Attempts'))
    dfs.apply(lambda row: plot_no_maze_from_dict(row.trajectory, attempt=row.name, userId=row.userId, timestamp=row.timestamp), axis=1)


# Scratch