# Setup

## Library imports

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import figure
#%matplotlib inline

from IPython.display import Image

import re

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, confusion_matrix

from sklearn.model_selection import cross_val_score, KFold


import scipy.stats
from scipy.stats import chi2




print("Setup Complete")

Setup Complete


In [2]:
!pip install duckdb
import duckdb

Collecting duckdb
  Downloading duckdb-0.7.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.2/15.2 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: duckdb
Successfully installed duckdb-0.7.1
[0m

In [3]:
df_train_filepath ='/kaggle/input/predict-student-performance-from-game-play/train.csv'

query = """SELECT event_name,
                COUNT(DISTINCT session_id) AS sessions
        FROM '/kaggle/input/predict-student-performance-from-game-play/train.csv'
        GROUP BY 1"""

In [4]:
con = duckdb.connect(database = ':memory:')
con.execute(query)
results = con.fetchall()

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

In [5]:
results

[('person_click', 11779),
 ('object_click', 11779),
 ('checkpoint', 11779),
 ('notebook_click', 10466),
 ('map_hover', 10845),
 ('observation_click', 11779),
 ('notification_click', 11779),
 ('object_hover', 10845),
 ('map_click', 11779),
 ('cutscene_click', 11779),
 ('navigate_click', 11779)]

In [6]:
type(results)

list

In [7]:
df = pd.DataFrame(results)

In [8]:
df

Unnamed: 0,0,1
0,person_click,11779
1,object_click,11779
2,checkpoint,11779
3,notebook_click,10466
4,map_hover,10845
5,observation_click,11779
6,notification_click,11779
7,object_hover,10845
8,map_click,11779
9,cutscene_click,11779


In [9]:
df_train_filepath ='/kaggle/input/predict-student-performance-from-game-play/train.csv'
df_train = pd.read_csv(df_train_filepath)
df_train

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,,,,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
3,20090312431273200,3,1147,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,I gotta run to my meeting!,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
4,20090312431273200,4,1863,person_click,basic,0,,-412.991405,-159.314686,381.0,494.0,,"Can I come, Gramps?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13174206,22100221145014656,1600,5483231,navigate_click,undefined,22,,343.887291,36.701026,483.0,273.0,,,,tunic.capitol_2.hall,,,,,13-22
13174207,22100221145014656,1601,5485166,navigate_click,undefined,22,,332.696070,141.493178,545.0,221.0,,,chap4_finale_c,tunic.capitol_2.hall,,,,,13-22
13174208,22100221145014656,1602,5485917,navigate_click,undefined,22,,369.912859,140.569205,611.0,217.0,,,,tunic.capitol_2.hall,,,,,13-22
13174209,22100221145014656,1603,5486753,navigate_click,undefined,22,,252.299653,123.805889,526.0,232.0,,,chap4_finale_c,tunic.capitol_2.hall,,,,,13-22


In [10]:
df_train_labels_filepath ='/kaggle/input/predict-student-performance-from-game-play/train_labels.csv'
df_train_labels = pd.read_csv(df_train_labels_filepath)
df_train_labels

Unnamed: 0,session_id,correct
0,20090312431273200_q1,1
1,20090312433251036_q1,0
2,20090314121766812_q1,1
3,20090314363702160_q1,1
4,20090314441803444_q1,1
...,...,...
212017,22100215342220508_q18,1
212018,22100215460321130_q18,1
212019,22100217104993650_q18,1
212020,22100219442786200_q18,1


# Exploratory Data Analysis

In [11]:
df_train.loc[(df_train['session_id'] == 20090312431273200)]

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,,,,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
3,20090312431273200,3,1147,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,I gotta run to my meeting!,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
4,20090312431273200,4,1863,person_click,basic,0,,-412.991405,-159.314686,381.0,494.0,,"Can I come, Gramps?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,,,,0-4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
876,20090312431273200,927,1267357,navigate_click,undefined,22,,927.307255,-10.355929,838.0,335.0,,,tomap,tunic.historicalsociety.entry,,,,,13-22
877,20090312431273200,928,1268292,map_hover,basic,22,,,,,,366.0,,tomap,tunic.historicalsociety.entry,,,,,13-22
878,20090312431273200,929,1269474,map_click,undefined,22,,457.523005,22.141338,443.0,316.0,,,tunic.capitol_2,tunic.historicalsociety.entry,,,,,13-22
879,20090312431273200,930,1270708,navigate_click,undefined,22,,224.190321,-60.268671,404.0,337.0,,,chap4_finale_c,tunic.capitol_2.hall,,,,,13-22


## Unique values

In [12]:
df_train['event_name'].unique()

array(['cutscene_click', 'person_click', 'navigate_click',
       'observation_click', 'notification_click', 'object_click',
       'object_hover', 'map_hover', 'map_click', 'checkpoint',
       'notebook_click'], dtype=object)