## GamePlay prediction

### Goal: To predict student performance during game-based learning in real-time

- session_id - the ID of the session the event took place in
- index - the index of the event for the session
- elapsed_time - how much time has passed (in milliseconds) between the start of the session and when the event was recorded
- event_name - the name of the event type
- name - the event name (e.g. identifies whether a notebook_click is is opening or closing the notebook)
- level - what level of the game the event occurred in (0 to 22)
- page - the page number of the event (only for notebook-related events)
- room_coor_x - the coordinates of the click in reference to the in-game room (only for click events)
- room_coor_y - the coordinates of the click in reference to the in-game room (only for click events)
- screen_coor_x - the coordinates of the click in reference to the player’s screen (only for click events)
- screen_coor_y - the coordinates of the click in reference to the player’s screen (only for click events)
- hover_duration - how long (in milliseconds) the hover happened for (only for hover events)
- text - the text the player sees during this event
- fqid - the fully qualified ID of the event
- room_fqid - the fully qualified ID of the room the event took place in
- text_fqid - the fully qualified ID of the
- fullscreen - whether the player is in fullscreen mode
- hq - whether the game is in high-quality
- music - whether the game music is on or off
- level_group - which group of levels - and group of questions - this row belongs to (0-4, 5-12, 13-22)


In [7]:
#reading the libraries
import pandas as pd
import numpy as np

In [68]:
#reading the datasets
train_data=pd.read_csv('data/train.csv')

#data_frame.head()
train_data_y

Unnamed: 0,session_id,correct
0,20090312431273200_q1,1
1,20090312433251036_q1,0
2,20090312455206810_q1,1
3,20090313091715820_q1,0
4,20090313571836404_q1,1
...,...,...
424111,22100215342220508_q18,1
424112,22100215460321130_q18,1
424113,22100217104993650_q18,1
424114,22100219442786200_q18,1


In [80]:
train_data_y=pd.read_csv('data/train_labels.csv')

In [81]:
train_data_y['question_number']=train_data_y['session_id'].str.split('_').str[1].copy()
train_data_y['session_id']=train_data_y['session_id'].str.split('_').str[0].copy()

In [84]:
train_data_y['question_number'].unique()

array(['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q11',
       'q12', 'q13', 'q14', 'q15', 'q16', 'q17', 'q18'], dtype=object)

In [91]:
train_data['name'].unique()

array(['basic', 'undefined', 'close', 'open', 'prev', 'next'],
      dtype=object)

In [97]:
train_data[train_data['event_name']=='map_hover']

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
127,20090312431273200,127,135124,map_hover,basic,3,,,,,,234.0,,tunic.historicalsociety,tunic.historicalsociety.entry,,0,0,1,0-4
128,20090312431273200,128,135256,map_hover,basic,3,,,,,,17.0,,tunic.kohlcenter,tunic.historicalsociety.entry,,0,0,1,0-4
160,20090312431273200,160,161405,map_hover,basic,4,,,,,,250.0,,toentry,tunic.kohlcenter.halloffame,,0,0,1,0-4
161,20090312431273200,161,161822,map_hover,basic,4,,,,,,17.0,,tunic.kohlcenter,tunic.kohlcenter.halloffame,,0,0,1,0-4
172,20090312431273200,182,226643,map_hover,basic,5,,,,,,750.0,,toentry,tunic.capitol_0.hall,,0,0,1,5-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26296860,22100221145014656,1519,5341715,map_hover,basic,21,,,,,,418.0,,tunic.capitol_2,tunic.wildlife.center,,0,0,1,13-22
26296888,22100221145014656,1547,5375509,map_hover,basic,21,,,,,,568.0,,tobasement,tunic.historicalsociety.entry,,0,0,1,13-22
26296889,22100221145014656,1548,5375959,map_hover,basic,21,,,,,,317.0,,tunic.capitol_2,tunic.historicalsociety.entry,,0,0,1,13-22
26296938,22100221145014656,1597,5482022,map_hover,basic,22,,,,,,1200.0,,tomap,tunic.historicalsociety.entry,,0,0,1,13-22


In [98]:
train_data[train_data['event_name']=='map_hover'].isnull().sum()

session_id             0
index                  0
elapsed_time           0
event_name             0
name                   0
level                  0
page              945159
room_coor_x       945159
room_coor_y       945159
screen_coor_x     945159
screen_coor_y     945159
hover_duration         0
text              945159
fqid                   0
room_fqid              0
text_fqid         945159
fullscreen             0
hq                     0
music                  0
level_group            0
dtype: int64

In [95]:
train_data[(train_data['session_id']==20090312431273200)&(train_data['level']==1)]

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
28,20090312431273200,28,28113,navigate_click,undefined,1,,-587.657879,-27.916913,441.0,303.0,,,retirement_letter,tunic.historicalsociety.closet,,0,0,1,0-4
29,20090312431273200,29,32229,notification_click,basic,1,,-182.558163,-1.906501,767.0,305.0,,Gramps is in trouble for losing papers?,,tunic.historicalsociety.closet,tunic.historicalsociety.closet.retirement_lett...,0,0,1,0-4
30,20090312431273200,30,33063,notification_click,basic,1,,-182.500704,-55.888296,767.0,359.0,,This can't be right!,,tunic.historicalsociety.closet,tunic.historicalsociety.closet.retirement_lett...,0,0,1,0-4
31,20090312431273200,31,34245,notification_click,basic,1,,-182.486523,-55.883804,767.0,359.0,,Gramps is a great historian!,,tunic.historicalsociety.closet,tunic.historicalsociety.closet.retirement_lett...,0,0,1,0-4
32,20090312431273200,32,36433,object_click,close,1,,-113.484832,241.116732,836.0,62.0,,,retirement_letter,tunic.historicalsociety.closet,,0,0,1,0-4
33,20090312431273200,33,36346,navigate_click,undefined,1,,-453.48472,-193.883232,496.0,497.0,,,,tunic.historicalsociety.closet,,0,0,1,0-4
34,20090312431273200,34,36896,navigate_click,undefined,1,,-381.244988,-22.454632,525.0,316.0,,,,tunic.historicalsociety.closet,,0,0,1,0-4
35,20090312431273200,35,37445,navigate_click,undefined,1,,-358.832033,54.847822,462.0,249.0,,,tobasement,tunic.historicalsociety.closet,,0,0,1,0-4
36,20090312431273200,36,39845,navigate_click,undefined,1,,-234.247817,-77.052212,245.0,376.0,,,janitor,tunic.historicalsociety.basement,,0,0,1,0-4
37,20090312431273200,37,41297,observation_click,basic,1,,-400.311667,-117.476454,179.0,405.0,,Hmm. Button's still not working.,janitor,tunic.historicalsociety.basement,tunic.historicalsociety.basement.janitor,0,0,1,0-4


In [88]:
train_data['session_id'].dtype

dtype('int64')

In [86]:
train_data_y[train_data_y['session_id']=='20090312431273200']

Unnamed: 0,session_id,correct,question_number
0,20090312431273200,1,q1
23562,20090312431273200,1,q2
47124,20090312431273200,1,q3
70686,20090312431273200,1,q4
94248,20090312431273200,1,q5
117810,20090312431273200,1,q6
141372,20090312431273200,1,q7
164934,20090312431273200,1,q8
188496,20090312431273200,1,q9
212058,20090312431273200,1,q10


In [78]:
len(train_data_y.unique())

23562

In [79]:
len(train_data['session_id'].unique())

23562

In [69]:
print(train_data.shape)
print(train_data_y.shape)

(26296946, 20)
(424116, 2)


In [71]:
train_data.head(50)

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,0,0,1,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
3,20090312431273200,3,1147,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,I gotta run to my meeting!,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
4,20090312431273200,4,1863,person_click,basic,0,,-412.991405,-159.314686,381.0,494.0,,"Can I come, Gramps?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
5,20090312431273200,5,3423,person_click,basic,0,,-412.991405,-157.314686,381.0,492.0,,"Sure thing, Jo. Grab your notebook and come up...",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
6,20090312431273200,6,5197,person_click,basic,0,,478.485079,-199.971679,593.0,485.0,,"See you later, Teddy.",teddy,tunic.historicalsociety.closet,tunic.historicalsociety.closet.teddy.intro_0_cs_0,0,0,1,0-4
7,20090312431273200,7,6180,person_click,basic,0,,503.355128,-168.619913,609.0,453.0,,I get to go to Gramps's meeting!,teddy,tunic.historicalsociety.closet,tunic.historicalsociety.closet.teddy.intro_0_cs_0,0,0,1,0-4
8,20090312431273200,8,7014,person_click,basic,0,,510.733442,-157.720642,615.0,442.0,,Now where did I put my notebook?,teddy,tunic.historicalsociety.closet,tunic.historicalsociety.closet.teddy.intro_0_cs_0,0,0,1,0-4
9,20090312431273200,9,7946,person_click,basic,0,,512.048005,-153.743631,616.0,438.0,,\u00f0\u0178\u02dc\u00b4,teddy,tunic.historicalsociety.closet,tunic.historicalsociety.closet.teddy.intro_0_cs_0,0,0,1,0-4


In [57]:
#checking the value counts for event_name
data_frame['event_name'].value_counts()

navigate_click        11326433
person_click           6052853
cutscene_click         2703035
object_click           2198211
object_hover           1057085
map_hover               945159
notification_click      649001
notebook_click          564544
map_click               517242
observation_click       212355
checkpoint               71028
Name: event_name, dtype: int64

In [60]:
data_frame.isnull().sum()

session_id               0
index                    0
elapsed_time             0
event_name               0
name                     0
level                    0
page              25732402
room_coor_x        2073272
room_coor_y        2073272
screen_coor_x      2073272
screen_coor_y      2073272
hover_duration    24294702
text              16679807
fqid               8274415
room_fqid                0
text_fqid         16679702
fullscreen               0
hq                       0
music                    0
level_group              0
dtype: int64

In [62]:
data_frame[(data_frame['event_name']!='map_hover')&(data_frame['event_name']!='checkpoint')].isnull().sum()

session_id               0
index                    0
elapsed_time             0
event_name               0
name                     0
level                    0
page              24716215
room_coor_x        1057085
room_coor_y        1057085
screen_coor_x      1057085
screen_coor_y      1057085
hover_duration    24223674
text              15663620
fqid               8274415
room_fqid                0
text_fqid         15663515
fullscreen               0
hq                       0
music                    0
level_group              0
dtype: int64

In [65]:
len(data_frame[data_frame['event_name']=='map_hover'])

945159

In [64]:
data_frame[data_frame['event_name']=='map_hover'].isnull().sum()

session_id             0
index                  0
elapsed_time           0
event_name             0
name                   0
level                  0
page              945159
room_coor_x       945159
room_coor_y       945159
screen_coor_x     945159
screen_coor_y     945159
hover_duration         0
text              945159
fqid                   0
room_fqid              0
text_fqid         945159
fullscreen             0
hq                     0
music                  0
level_group            0
dtype: int64

In [58]:
data_frame[data_frame['event_name']=='notebook_click'].isnull().sum()

session_id             0
index                  0
elapsed_time           0
event_name             0
name                   0
level                  0
page                   0
room_coor_x            0
room_coor_y            0
screen_coor_x          0
screen_coor_y          0
hover_duration    564544
text              564544
fqid              564544
room_fqid              0
text_fqid         564544
fullscreen             0
hq                     0
music                  0
level_group            0
dtype: int64

In [19]:
def null_percentages(data_frame):
    for cols in data_frame.columns:
        null_value_count=data_frame[cols].isnull().sum()
        dataset_len=len(data_frame)
        print('Column Name: ',cols,' Null value percentage: ',(null_value_count/dataset_len)*100)

In [20]:
null_percentages(data_frame)

Column Name:  session_id  Null value percentage:  0.0
Column Name:  index  Null value percentage:  0.0
Column Name:  elapsed_time  Null value percentage:  0.0
Column Name:  event_name  Null value percentage:  0.0
Column Name:  name  Null value percentage:  0.0
Column Name:  level  Null value percentage:  0.0
Column Name:  page  Null value percentage:  97.85319557639887
Column Name:  room_coor_x  Null value percentage:  7.884079010543658
Column Name:  room_coor_y  Null value percentage:  7.884079010543658
Column Name:  screen_coor_x  Null value percentage:  7.884079010543658
Column Name:  screen_coor_y  Null value percentage:  7.884079010543658
Column Name:  hover_duration  Null value percentage:  92.38602079496228
Column Name:  text  Null value percentage:  63.428684836634645
Column Name:  fqid  Null value percentage:  31.465307796578358
Column Name:  room_fqid  Null value percentage:  0.0
Column Name:  text_fqid  Null value percentage:  63.42828555072517
Column Name:  fullscreen  Null

In [21]:
#Remove page column

## Cleaning the data

In [27]:
data_frame[data_frame['level'].isnull()==False]

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
0,20090312431273200,0,0,cutscene_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,undefined,intro,tunic.historicalsociety.closet,tunic.historicalsociety.closet.intro,0,0,1,0-4
1,20090312431273200,1,1323,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,"Whatcha doing over there, Jo?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
2,20090312431273200,2,831,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,Just talking to Teddy.,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
3,20090312431273200,3,1147,person_click,basic,0,,-413.991405,-159.314686,380.0,494.0,,I gotta run to my meeting!,gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
4,20090312431273200,4,1863,person_click,basic,0,,-412.991405,-159.314686,381.0,494.0,,"Can I come, Gramps?",gramps,tunic.historicalsociety.closet,tunic.historicalsociety.closet.gramps.intro_0_...,0,0,1,0-4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26296941,22100221145014656,1600,5483231,navigate_click,undefined,22,,343.887291,36.701026,483.0,273.0,,,,tunic.capitol_2.hall,,0,0,1,13-22
26296942,22100221145014656,1601,5485166,navigate_click,undefined,22,,332.696070,141.493178,545.0,221.0,,,chap4_finale_c,tunic.capitol_2.hall,,0,0,1,13-22
26296943,22100221145014656,1602,5485917,navigate_click,undefined,22,,369.912859,140.569205,611.0,217.0,,,,tunic.capitol_2.hall,,0,0,1,13-22
26296944,22100221145014656,1603,5486753,navigate_click,undefined,22,,252.299653,123.805889,526.0,232.0,,,chap4_finale_c,tunic.capitol_2.hall,,0,0,1,13-22


In [33]:
data_frame[['page','level']].groupby('level')['page'].value_counts()

level  page
1      0.0     20199
2      0.0     19766
3      0.0     24244
4      1.0     16287
       0.0      1237
               ...  
22     4.0       175
       0.0       122
       3.0       122
       2.0       107
       1.0        96
Name: page, Length: 86, dtype: int64

0.0    19766
Name: page, dtype: int64

In [47]:
for level_val in data_frame['level'].value_counts().index.sort_values():
    print('"Level value": ',level_val)
    for page_val in data_frame[data_frame['level']==level_val]['page'].value_counts().index:
        print('Page value: ',page_val)
    print('----------------------------------------------------------')

"Level value":  0
----------------------------------------------------------
"Level value":  1
Page value:  0.0
----------------------------------------------------------
"Level value":  2
Page value:  0.0
----------------------------------------------------------
"Level value":  3
Page value:  0.0
----------------------------------------------------------
"Level value":  4
Page value:  1.0
Page value:  0.0
----------------------------------------------------------
"Level value":  5
Page value:  1.0
Page value:  0.0
----------------------------------------------------------
"Level value":  6
Page value:  1.0
Page value:  0.0
----------------------------------------------------------
"Level value":  7
Page value:  1.0
Page value:  0.0
----------------------------------------------------------
"Level value":  8
Page value:  2.0
Page value:  1.0
Page value:  0.0
----------------------------------------------------------
"Level value":  9
Page value:  2.0
Page value:  0.0
Page value:  1.0


In [56]:
l4_pg1=data_frame[(data_frame['level']==4)|(data_frame['level']==5)]
#null_percentages(l4_pg1)
l4_pg1.corr()

Unnamed: 0,session_id,index,elapsed_time,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,fullscreen,hq,music
session_id,1.0,-0.092591,0.009057,-0.010228,0.002885,-0.003551,0.000856,-0.00067,0.01122,0.000538,0.035793,-0.010257,0.052114
index,-0.092591,1.0,0.084771,0.109475,0.012515,0.036802,-0.012596,0.065406,-0.000679,-0.000416,-0.018108,0.000596,0.024968
elapsed_time,0.009057,0.084771,1.0,0.007058,0.006565,0.001515,0.000783,0.002875,-0.002526,-0.00015,0.005103,0.003965,-0.002224
level,-0.010228,0.109475,0.007058,1.0,0.049085,0.384088,-0.205211,0.599138,0.077866,0.01501,-0.005515,-0.004216,0.002306
page,0.002885,0.012515,0.006565,0.049085,1.0,-0.187096,0.01568,-0.24787,-0.017946,,-0.027465,-0.011217,-0.015252
room_coor_x,-0.003551,0.036802,0.001515,0.384088,-0.187096,1.0,-0.074147,0.796879,-0.055438,,-0.005931,-0.00562,0.004601
room_coor_y,0.000856,-0.012596,0.000783,-0.205211,0.01568,-0.074147,1.0,-0.105214,-0.929363,,0.000623,-0.001241,-0.002773
screen_coor_x,-0.00067,0.065406,0.002875,0.599138,-0.24787,0.796879,-0.105214,1.0,-0.02971,,0.011804,-2.9e-05,0.005931
screen_coor_y,0.01122,-0.000679,-0.002526,0.077866,-0.017946,-0.055438,-0.929363,-0.02971,1.0,,0.047298,0.016107,0.005429
hover_duration,0.000538,-0.000416,-0.00015,0.01501,,,,,,1.0,-0.002823,-0.001513,0.000166


In [48]:
data_frame[(data_frame['level']==4)&(data_frame['page']==1.0)].corr()

Unnamed: 0,session_id,index,elapsed_time,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,fullscreen,hq,music
session_id,1.0,-0.075517,0.024882,,,-0.009144,0.00498,-0.003489,0.000217,,0.053487,-0.020887,0.073594
index,-0.075517,1.0,0.203654,,,-0.038839,0.010181,-0.000691,-0.009056,,-0.015682,0.005638,0.006728
elapsed_time,0.024882,0.203654,1.0,,,-0.007074,-0.000395,0.001696,-0.000582,,-0.00933,-0.011447,-0.003272
level,,,,,,,,,,,,,
page,,,,,,,,,,,,,
room_coor_x,-0.009144,-0.038839,-0.007074,,,1.0,0.719845,0.829605,-0.715754,,-0.01124,-0.010126,-0.01332
room_coor_y,0.00498,0.010181,-0.000395,,,0.719845,1.0,0.869718,-0.993665,,-0.011423,0.004385,-0.006653
screen_coor_x,-0.003489,-0.000691,0.001696,,,0.829605,0.869718,1.0,-0.863301,,-0.002718,-0.00503,-0.004156
screen_coor_y,0.000217,-0.009056,-0.000582,,,-0.715754,-0.993665,-0.863301,1.0,,0.027223,-0.002229,0.00777
hover_duration,,,,,,,,,,,,,


In [25]:
#page and level has high correlation

In [67]:
#getting the data with the levels 4,12 and 22.
data_frame[(data_frame['level']==4)|(data_frame['level']==12)|(data_frame['level']==22)]

Unnamed: 0,session_id,index,elapsed_time,event_name,name,level,page,room_coor_x,room_coor_y,screen_coor_x,screen_coor_y,hover_duration,text,fqid,room_fqid,text_fqid,fullscreen,hq,music,level_group
152,20090312431273200,152,156239,navigate_click,undefined,4,,337.732330,-51.000000,348.0,381.0,,,,tunic.kohlcenter.halloffame,,0,0,1,0-4
153,20090312431273200,153,156838,navigate_click,undefined,4,,30.196745,-51.000000,95.0,381.0,,,,tunic.kohlcenter.halloffame,,0,0,1,0-4
154,20090312431273200,154,157439,navigate_click,undefined,4,,-157.073154,-65.000000,35.0,395.0,,,,tunic.kohlcenter.halloffame,,0,0,1,0-4
155,20090312431273200,155,157972,navigate_click,undefined,4,,-301.805255,-72.000000,34.0,402.0,,,,tunic.kohlcenter.halloffame,,0,0,1,0-4
156,20090312431273200,156,158683,navigate_click,undefined,4,,-467.567269,-51.000000,33.0,381.0,,,,tunic.kohlcenter.halloffame,,0,0,1,0-4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26296941,22100221145014656,1600,5483231,navigate_click,undefined,22,,343.887291,36.701026,483.0,273.0,,,,tunic.capitol_2.hall,,0,0,1,13-22
26296942,22100221145014656,1601,5485166,navigate_click,undefined,22,,332.696070,141.493178,545.0,221.0,,,chap4_finale_c,tunic.capitol_2.hall,,0,0,1,13-22
26296943,22100221145014656,1602,5485917,navigate_click,undefined,22,,369.912859,140.569205,611.0,217.0,,,,tunic.capitol_2.hall,,0,0,1,13-22
26296944,22100221145014656,1603,5486753,navigate_click,undefined,22,,252.299653,123.805889,526.0,232.0,,,chap4_finale_c,tunic.capitol_2.hall,,0,0,1,13-22
