# Introduction
The project below investigates 2019 Trends in International Math and Science Study (TIMSS) Data for 8th graders.

_Source: [2019 TIMSS Database](https://timss2019.org/international-database/)_

### Questions of Interest
1. How does a student's enviroment at home, in the classroom, and at school affect academic understanding?
2. Are there specific teacher behaviors that lead to improved understanding in specific disciplines?
3. What can students and teachers do to improve student academic understanding?

The project will cover a variety of analyses regarding vast data collected. The project will include:
* prediction of student score based on student attitudes and demographics, school characteristics
* prediction of student scores based on teacher attitudes and practices
* recommendation engine for additional study problems for a given student (or group of students from a school or country)
* recommendation engine for areas of investment or professional development for a given teacher (or group of teachers from a school or country)

In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
import glob
import re
pd.options.display.max_columns = None
pd.options.display.max_rows = 100

# Load Data
Import the 2019 TIMSS Data into six separate dataframes

In [20]:
school = 0
stud_ach = 0
student = 0
stud_teach = 0
math_teach = 0
sci_teach = 0
for file in glob.glob('T19_G8_SPSS Data/*.sav'):
    if re.search('bcg',file) != None:
        school_cols = ['IDCNTRY','IDSCHOOL','IDPOP','IDGRADER','IDGRADE','ITLANG_C','LCID_C','BCBGDAS','BCBGEAS','BCBGMRS','BCBGSRS','BCDGDAS','BCDGEAS','BCDGMRS','BCDGSRS','BCDGSBC','BCDGTIHY']
        try:
            school = school.append(pd.read_spss(file, usecols=school_cols), ignore_index=True, sort=False)
        except:
            school = pd.read_spss(file, usecols=school_cols)
    elif re.search('bsa',file) != None:
        try:
            stud_ach = stud_ach.append(pd.read_spss(file), ignore_index=True, sort=False)
        except:
            stud_ach = pd.read_spss(file)
    elif re.search('bsg',file) != None:
        student_cols = ['IDCNTRY','IDBOOK','IDSCHOOL','IDCLASS','IDSTUD','BSBG01','BSBG03','BSBG04','BSBG05A','BSBG05B','BSBG05C','BSBG05D','BSBG05E','BSBG05F','BSBG05G','BSBG05H','BSBG05I','BSBG06A','BSBG06B','BSBG07','BSBG08A','BSBG08B','BSBG09A','BSBG09B','BSBG10','BSBG11A','BSBG11B','BSBG12A','BSBG12B','BSBG12C','BSBG12D','BSBG12E','BSBG12F','IDPOP','IDGRADE','ITLANG_SQ','LCID_SQ','ITLANG_SA','LCID_SA','ITSEX','BSDAGE','BSBGHER','BSDGHER','BSBGSSB','BSDGSSB','BSBGSB','BSDGSB','BSBGSLM','BSDGSLM','BSBGICM','BSDGICM','BSBGDML','BSDGDML','BSBGSCM','BSDGSCM','BSBGSVM','BSDGSVM','BSBGSLS','BSDGSLS','BSBGICS','BSDGICS','BSBGSCS','BSDGSCS','BSBGSVS','BSDGSVS','BSBGSLB','BSDGSLB','BSBGICB','BSDGICB','BSBGSCB','BSDGSCB','BSBGSLE','BSDGSLE','BSBGICE','BSDGICE','BSBGSCE','BSDGSCE','BSBGSLC','BSDGSLC','BSBGICC','BSDGICC','BSBGSCC','BSDGSCC','BSBGSLP','BSDGSLP','BSBGICP','BSDGICP','BSBGSCP','BSDGSCP','BSBGSEC','BSDGSEC', 'BSDG05S','BSDGEDUP']
        try:
            student = student.append(pd.read_spss(file, usecols=student_cols), ignore_index=True, sort=False)
        except:
            student = pd.read_spss(file, usecols=student_cols)
    elif re.search('bst',file) != None:
        stud_teach_cols = ['IDCNTRY', 'IDBOOK', 'IDSCHOOL', 'IDCLASS', 'IDSTUD', 'IDTEALIN', 'IDTEACH','IDLINK', 'IDPOP', 'IDGRADE', 'IDSUBJ', 'ITCOURSE', 'NMTEACH','NSTEACH']
        try:
            stud_teach = stud_teach.append(pd.read_spss(file, usecols=stud_teach_cols), ignore_index=True, sort=False)
        except:
            stud_teach = pd.read_spss(file, usecols=stud_teach_cols)
    elif re.search('btm',file) != None:
        try:
            math_teach = math_teach.append(pd.read_spss(file), ignore_index=True, sort=False)
        except:
            math_teach = pd.read_spss(file)
    elif re.search('bts',file) != None:
        try:
            sci_teach = sci_teach.append(pd.read_spss(file), ignore_index=True, sort=False)
        except:
            sci_teach = pd.read_spss(file)

In [None]:
stud_ach_drop_cols = ['VERSION','SCOPE']
math_teach_drop_cols = ['BTBM18*','BTBM22*','VERSION','SCOPE']
sci_teach_drop_cols = ['BTBS17*','BTBS21*','VERSION','SCOPE']

In [249]:
file_name = 'T19_G8_Codebooks/T19_G8_Codebook.xlsx'
dfs = pd.read_excel(file_name, sheet_name=None)

In [251]:
for df in dfs:
    dfs[df].set_index('Variable', inplace=True)

In [252]:
dfs['BSAM7'].iloc[:,:1]

Unnamed: 0_level_0,Label
Variable,Unnamed: 1_level_1
IDCNTRY,Country ID - Numeric ISO Code
IDBOOK,Student Test Booklet
IDSCHOOL,School ID
IDCLASS,Class ID
IDSTUD,Student ID
...,...
SE72323_S,TIME ON SCREEN (SECONDS) CONDITIONS WHERE ROCK...
SE72368_S,TIME ON SCREEN (SECONDS) POSITION OF MOON WHEN...
SE72303_S,TIME ON SCREEN (SECONDS) DIAGRAM BEST REPRESEN...
VERSION,Version


In [149]:
correct_cols = []
for col in list(stud_ach.iloc[:1,:].columns):
    try:
        if "*" in str(stud_ach.iloc[:1,:].loc[:,col]) or ("INCORRECT" not in str(stud_ach.iloc[:1,:].loc[:,col]) and "CORRECT" in str(stud_ach.iloc[:1,:].loc[:,col])):
            correct_cols.append(col)
    except:
        pass
stud_ach.iloc[:1,:].loc[:,correct_cols]

  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())
  for val, m in zip(values.ravel(), mask.ravel())


Unnamed: 0,ME62271,ME62152,ME62230,ME62076,ME62030,ME62171,ME62194,ME62320,ME72001,ME72019,ME72189,ME72024,ME72043,ME72221,ME72220,ME72211A,SE62119,SE62006A,SE62006B,SE62006C,SE62006,SE62247,SE62037,SE72460,SE72000A,SE72038,SE72120,SE72143A,SE72143B,SE72143C,SE72143D,SE72143,SE72523,SE72168,SE72370
0,D*,CORRECT RESPONSE,B*,A*,CORRECT RESPONSE,D*,C*,D*,CORRECT RESPONSE,CORRECT RESPONSE,B*,CORRECT RESPONSE,C*,B*,D*,B*,C*,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,B*,A*,A*,E*,A*,B*,A*,C*,D*,B*,CORRECT RESPONSE,CORRECT RESPONSE,D*,C*


In [135]:
(~stud_ach.iloc[:1,:].isnull()).sum(axis=1)

0    141
dtype: int64

In [130]:
stud_ach.head()

Unnamed: 0,IDCNTRY,IDBOOK,IDSCHOOL,IDCLASS,IDSTUD,MP52024,MP52058A,MP52058B,MP52125,MP52229,MP52063,MP52072,MP52146A,MP52146B,MP52092,MP52046,MP52083,MP52082,MP52161,MP52418A,MP52418B,MP72007A,MP72007B,MP72007C,MP72007D,MP72007E,MP72007,MP72025,MP72017,MP72190,MP72068,MP72076,MP72056,MP72098,MP72103,MP72121,MP72180,MP72198A,MP72198B,MP72198,MP72227,MP72170,MP72209,MP62005,MP62139,MP62164,MP62142,MP62084,MP62351,MP62223,MP62027,MP62174,MP62244,MP62261,MP62300,MP62254,MP62132A,MP62132B,MP72178,MP72234,MP72020,MP72027,MP72052A,MP72052B,MP72052,MP72067,MP72083A,MP72083B,MP72108A,MP72108B,MP72181,MP72126,MP72164A,MP72164B,MP72164C,MP72164D,MP72164E,MP72164,MP72185A,MP72185B,MP52413,MP52134,MP52078,MP52034,MP52174A,MP52174B,MP52130,MP52073,MP52110,MP52105,MP52407,MP52036,MP52502,MP52117,MP52426,MP62150,MP62335,MP62219,MP62002,MP62149,MP62241,MP62342,MP62105,MP62040,MP62288,MP62173,MP62133,MP62123A,MP62123B,MP52079,MP52204,MP52364,MP52215,MP52147,MP52067,MP52068,MP52087,MP52048,MP52039,MP52208,MP52419A,MP52419B,MP52115,MP52421,MP72002,MP72188,MP72035,MP72055,MP72222,MP72090,MP72233,MP72106A,MP72106B,MP72106C,MP72128A,MP72128B,MP72119,MP72153A,MP72153B,MP72172,MP62329,MP62151,MP62346,MP62212,MP62056,MP62317,MP62350,MP62078,MP62284,MP62245,MP62287,MP62345A,MP62345BA,MP62345BB,MP62345BC,MP62345BD,MP62345B,MP62115,MP72187,MP72022,MP72038,MP72045,MP72049,MP72069,MP72074,MP72013,MP72095A,MP72095B,MP72095,MP72109,MP72125,MP72196,MP72237,MP72232A,MP72232B,MP72232C,MP72232D,MP72232,MP72206,MP62271,MP62152,MP62215,MP62143,MP62230,MP62095,MP62076,MP62030,MP62171,MP62301,MP62194,MP62344,MP62320,MP62296,MP72001,MP72019,MP72189,MP72024,MP72043,MP72221,MP72220,MP72225A,MP72225B,MP72225,MP72110A,MP72110B,MP72150,MP72139,MP72229,MP72171,MP72211A,MP72211B,MP62001,MP62214,MP62146,MP62154,MP62067,MP62341,MP62242,MP62250A,MP62250B,MP62170,MP62192,MP62072,MP62048A,MP62048B,MP62048C,MP62048,MP62120,MP72005,MP72021,MP72026,MP72041A,MP72041B,MP72223,MP72094,MP72059,MP72080,MP72081,MP72140A,MP72140B,MP72140C,MP72140D,MP72140E,MP72140F,MP72140,MP72120,MP72131,MP72147,MP72154,MP72192,MP72161,SP52006,SP52069,SP52012,SP52021,SP52095A,SP52095B,SP52095C,SP52095D,SP52095Z,SP52134,SP52054,SP52150,SP52243A,SP52243B,SP52243C,SP52206,SP52112A,SP52112B,SP52294,SP72072,SP72029,SP72902,SP72077,SP72900A,SP72900B,SP72103,SP72110,SP72130A,SP72130B,SP72130C,SP72130,SP72148,SP72200,SP72232A,SP72232B,SP72232C,SP72232D,SP72232E,SP72232F,SP72232,SP72275,SP72244,SP72301,SP72721,SP72335,SP62055,SP62007,SP62275,SP62225,SP62111,SP62116A,SP62116B,SP62116C,SP62262,SP62035,SP62144,SP62162,SP62233,SP62272,SP62171,SP72002,SP72403,SP72021,SP72082,SP72066,SP72063,SP72102,SP72141A,SP72141B,SP72921,SP72234,SP72251,SP72284,SP72345A,SP72345B,SP72345C,SP72345D,SP72345E,SP72345F,SP72345G,SP72345,SP72349,SP72363,SP52076,SP52272,SP52085A,SP52085B,SP52094,SP52248,SP52146,SP52282,SP52299,SP52144,SP52214,SP52221,SP52101,SP52113,SP52107,SP62090,SP62274,SP62284,SP62098A,SP62098B,SP62032,SP62043,SP62158,SP62159,SP62005,SP62075,SP62004,SP62175,SP62173AA,SP62173AB,SP62173AC,SP62173AD,SP62173A,SP62173B,SP52090A,SP52090B,SP52262,SP52267,SP52273,SP52015A,SP52015B,SP52015C,SP52015D,SP52015E,SP52015F,SP52015Z,SP52051,SP52026,SP52130,SP52028,SP52189,SP52217,SP52038,SP52099,SP52118,SP72070,SP72400,SP72024,SP72462,SP72443,SP72903,SP72145,SP72100,SP72133,SP72137,SP72298,SP72215,SP72260,SP72265A,SP72265B,SP72265C,SP72265D,SP72265E,SP72265,SP72347,SP72351,SP72367,SP62099,SP62095,SP62106,SP62064,SP62132,SP62163,SP62153,SP62018A,SP62018B,SP62018C,SP62018D,SP62018E,SP62018,SP62143,SP62276,SP62050,SP62205,SP62190,SP62024A,SP62024B,SP72033,SP72440,SP72032,SP72031,SP72086,SP72005,SP72048,SP72123,SP72116,SP72920,SP72294,SP72231,SP72261A,SP72261B,SP72261C,SP72261D,SP72261E,SP72261,SP72220,SP72348,SP72720,SP62279,SP62112,SP62119,SP62093,SP62089,SP62006,SP62067,SP62247,SP62177,SP62186,SP62211A,SP62211B,SP62036,SP62033,SP62037,SP62242A,SP62242B,SP62242C,SP62242D,SP62242E,SP62242,SP72078,SP72460,SP72000,SP72906A,SP72906B,SP72906C,SP72906D,SP72906E,SP72906,SP72901,SP72038,SP72120,SP72143,SP72523,SP72168,SP72205,SP72293,SP72280A,SP72280B,SP72370,SP72329,SP62091A,SP62091B,SP62100,SP62097,SP62101,SP62266,SP62128,SP62047A,SP62047B,SP62047C,SP62047,SP62042,SP62250,SP62246,SP62056,SP62235,SP62180,SP62022A,SP62022B,SP62022C,SP62022D,SP62022,SP62243,SP72011,SP72905,SP72049,SP72016A,SP72016B,SP72016,SP72451,SP72074,SP72091,SP72109,SP72140,SP72132,SP72209,SP72210,SP72249,SP72323,SP72368,SP72303,ME52024,ME52058A,ME52058B,ME52125,ME52229,ME52063,ME52072,ME52146A,ME52146B,ME52092,ME52046,ME52083,ME52082,ME52161,ME52418A,ME52418B,ME72007A,ME72007B,ME72007C,ME72007D,ME72007E,ME72007,ME72025,ME72017,ME72190,ME72068,ME72076,ME72056,ME72098,ME72103,ME72121,ME72180A,ME72180B,ME72180C,ME72180,ME72198A,ME72198B,ME72198,ME72227,ME72170A,ME72170B,ME72170C,ME72170,ME72209,ME62005,ME62139,ME62164,ME62142,ME62084,ME62351,ME62223,ME62027,ME62174,ME62244A,ME62244B,ME62244,ME62261,ME62300,ME62254,ME62132A,ME62132B,ME72178A,ME72178B,ME72178C,ME72178D,ME72178E,ME72178,ME72234,ME72020A,ME72020B,ME72020C,ME72020D,ME72020,ME72027,ME72052A,ME72052B,ME72052,ME72067,ME72083A,ME72083B,ME72108A,ME72108B,ME72181,ME72126,ME72164A,ME72164B,ME72164C,ME72164D,ME72164E,ME72164,ME72185A,ME72185B,ME52413,ME52134,ME52078,ME52034,ME52174A,ME52174B,ME52130,ME52073,ME52110,ME52105,ME52407,ME52036,ME52502A,ME52502B,ME52502C,ME52502D,ME52502,ME52117,ME52426,ME62150,ME62335,ME62219,ME62002,ME62149,ME62241,ME62342,ME62105,ME62040,ME62288A,ME62288B,ME62288,ME62173,ME62133,ME62123A,ME62123B,ME52079,ME52204,ME52364,ME52215,ME52147,ME52067,ME52068,ME52087A,ME52087B,ME52087,ME52048,ME52039,ME52208,ME52419A,ME52419B,ME52115,ME52421,ME72002,ME72188,ME72035,ME72055A,ME72055B,ME72055C,ME72055D,ME72055E,ME72055F,ME72055,ME72222,ME72090,ME72233,ME72106A,ME72106B,ME72106C,ME72128A,ME72128B,ME72119,ME72153A,ME72153B,ME72172,ME62329,ME62151,ME62346,ME62212,ME62056,ME62317A,ME62317B,ME62317C,ME62317,ME62350,ME62078,ME62284,ME62245,ME62287,ME62345AA,ME62345AB,ME62345AC,ME62345AD,ME62345A,ME62345BA,ME62345BB,ME62345BC,ME62345BD,ME62345B,ME62115,ME72187,ME72022,ME72038,ME72045,ME72049,ME72069,ME72074,ME72013,ME72095A,ME72095B,ME72095,ME72109,ME72125,ME72196,ME72237,ME72232A,ME72232B,ME72232C,ME72232D,ME72232,ME72206,ME62271,ME62152,ME62215A,ME62215B,ME62215,ME62143,ME62230,ME62095,ME62076,ME62030,ME62171,ME62301,ME62194,ME62344,ME62320,ME62296,ME72001,ME72019,ME72189,ME72024,ME72043,ME72221,ME72220,ME72225A,ME72225B,ME72225,ME72110A,ME72110B,ME72150,ME72139,ME72229,ME72171,ME72211A,ME72211B,ME62001,ME62214,ME62146,ME62154,ME62067,ME62341,ME62242,ME62250A,ME62250B,ME62170A,ME62170B,ME62170,ME62192,ME62072,ME62048A,ME62048B,ME62048C,ME62048,ME62120,ME72005,ME72021,ME72026,ME72041A,ME72041B,ME72223,ME72094,ME72059,ME72080,ME72081A,ME72081B,ME72081C,ME72081D,ME72081,ME72140A,ME72140B,ME72140C,ME72140D,ME72140E,ME72140F,ME72140,ME72120,ME72131,ME72147,ME72154,ME72192,ME72161,SE52006,SE52069,SE52012,SE52021,SE52095B,SE52095C,SE52095D,SE52095Z,SE52134,SE52054,SE52150,SE52243A,SE52243B,SE52243C,SE52206,SE52112A,SE52112B,SE52294,SE72072,SE72029,SE72902,SE72077,SE72900A,SE72900B,SE72103,SE72110,SE72130A,SE72130B,SE72130C,SE72130,SE72148,SE72200,SE72232A,SE72232B,SE72232C,SE72232D,SE72232E,SE72232F,SE72232,SE72275,SE72244,SE72301,SE72721,SE72335,SE62055,SE62007,SE62275,SE62225,SE62111,SE62116A,SE62116B,SE62116C,SE62262,SE62035,SE62144,SE62162,SE62233,SE62272,SE62171,SE72002,SE72403A,SE72403B,SE72403C,SE72403D,SE72403,SE72021,SE72082,SE72066,SE72063,SE72102,SE72141A,SE72141B,SE72921,SE72234,SE72251,SE72284,SE72345A,SE72345B,SE72345C,SE72345D,SE72345E,SE72345F,SE72345G,SE72345,SE72349,SE72363,SE52076,SE52272,SE52085A,SE52085B,SE52094,SE52248,SE52146,SE52282,SE52299,SE52144,SE52214,SE52221,SE52101,SE52113,SE52107,SE62090,SE62274,SE62284,SE62098A,SE62098B,SE62032,SE62043,SE62158,SE62159,SE62005,SE62075,SE62004,SE62175,SE62173AA,SE62173AB,SE62173AC,SE62173AD,SE62173A,SE62173B,SE52090A,SE52090B,SE52262,SE52267,SE52273,SE52015A,SE52015B,SE52015C,SE52015D,SE52015E,SE52015F,SE52015Z,SE52051,SE52026,SE52130,SE52028,SE52189,SE52217,SE52038,SE52099,SE52118,SE72070,SE72400A,SE72400B,SE72400C,SE72400D,SE72400,SE72024,SE72462,SE72443,SE72903,SE72145,SE72100,SE72133,SE72137,SE72298,SE72215,SE72260A,SE72260B,SE72260C,SE72260D,SE72260E,SE72260F,SE72260G,SE72260H,SE72260,SE72265A,SE72265B,SE72265C,SE72265D,SE72265E,SE72265,SE72347,SE72351,SE72367,SE62099,SE62095,SE62106,SE62064,SE62132,SE62163,SE62153,SE62018A,SE62018B,SE62018C,SE62018D,SE62018E,SE62018,SE62143,SE62276,SE62050,SE62205,SE62190,SE62024A,SE62024B,SE72033A,SE72033B,SE72033C,SE72033D,SE72033E,SE72033,SE72440,SE72032,SE72031,SE72086A,SE72086B,SE72086C,SE72086D,SE72086,SE72005,SE72048,SE72123,SE72116,SE72920,SE72294,SE72231,SE72261A,SE72261B,SE72261C,SE72261D,SE72261E,SE72261,SE72220,SE72348,SE72720,SE62279,SE62112,SE62119,SE62093,SE62089,SE62006A,SE62006B,SE62006C,SE62006,SE62067,SE62247,SE62177,SE62186,SE62211A,SE62211B,SE62036,SE62033,SE62037,SE62242A,SE62242B,SE62242C,SE62242D,SE62242E,SE62242,SE72078,SE72460,SE72000A,SE72000B,SE72000C,SE72000D,SE72000E,SE72000,SE72906A,SE72906B,SE72906C,SE72906D,SE72906E,SE72906,SE72901,SE72038,SE72120,SE72143A,SE72143B,SE72143C,SE72143D,SE72143,SE72523,SE72168,SE72205,SE72293,SE72280A,SE72280B,SE72370,SE72329,SE62091A,SE62091B,SE62100,SE62097,SE62101A,SE62101B,SE62101C,SE62101D,SE62101,SE62266,SE62128,SE62047A,SE62047B,SE62047C,SE62047,SE62042A,SE62042B,SE62042C,SE62042D,SE62042,SE62250,SE62246,SE62056,SE62235,SE62180,SE62022A,SE62022B,SE62022C,SE62022D,SE62022,SE62243A,SE62243B,SE62243C,SE62243D,SE62243,SE72011,SE72905A,SE72905B,SE72905C,SE72905D,SE72905,SE72049,SE72016A,SE72016B,SE72016,SE72451,SE72074,SE72091,SE72109,SE72140,SE72132,SE72209,SE72210,SE72249,SE72323,SE72368,SE72303,BNRGCAL1,BNRGCAL2,IDPOP,IDGRADER,IDGRADE,ITLANG_SA,LCID_SA,ITSEX,BSDAGE,ITADMINI,ILRELIAB,TOTWGT,HOUWGT,SENWGT,WGTADJ1,WGTADJ2,WGTADJ3,WGTFAC1,WGTFAC2,WGTFAC3,JKREP,JKZONE,BSMIBM01,BSMIBM02,BSMIBM03,BSMIBM04,BSMIBM05,BSSIBM01,BSSIBM02,BSSIBM03,BSSIBM04,BSSIBM05,Part1TimeFlag,Part2TimeFlag,VERSION,SCOPE,mmat_avg,mmat_sd,ssci_avg,ssci_sd,malg_avg,malg_sd,mapp_avg,mapp_sd,mdat_avg,mdat_sd,mgeo_avg,mgeo_sd,mkno_avg,mkno_sd,mnum_avg,mnum_sd,mrea_avg,mrea_sd,sapp_avg,sapp_sd,sbio_avg,sbio_sd,sche_avg,sche_sd,sear_avg,sear_sd,skno_avg,skno_sd,sphy_avg,sphy_sd,srea_avg,srea_sd,math_freq_avg,math_freq_sd,sci_freq_avg,sci_freq_sd,math_time_avg,math_time_sd,sci_time_avg,sci_time_sd
0,7842.0,Booklet 11,5001.0,500102.0,50010201.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,D*,CORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,B*,C,A*,CORRECT RESPONSE,D*,INCORRECT RESPONSE,C*,INCORRECT RESPONSE,D*,,CORRECT RESPONSE,CORRECT RESPONSE,B*,CORRECT RESPONSE,C*,B*,D*,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,B*,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,C,INCORRECT RESPONSE,C*,,B,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,INCORRECT RESPONSE,B*,B,C,,,,,A*,,,,,,,,A*,E*,F,,A,C,INCORRECT RESPONSE,,,,,,,,A*,B*,A*,C*,D*,B*,CORRECT RESPONSE,CORRECT RESPONSE,D*,A,,,,C*,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.58,Teacher from School,Students Booklet was used for Reliability Scoring,1.8,0.625133,0.038099,1.0,1.0,1.066667,1.0,1.5,1.125,1.0,31.0,At or above 475 but below 550,At or above 475 but below 550,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 475 but below 550,At or above 400 but below 475,At or above 400 but below 475,At or above 475 but below 550,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),550.550842,14.177866,480.099946,26.598791,517.042408,28.358961,502.83618,27.430262,485.677994,10.297154,508.16895,32.114046,527.618686,23.479411,535.162198,21.271499,517.203196,13.943475,459.227332,24.484878,404.517518,42.645838,443.350576,55.498128,412.148076,46.859601,471.580088,22.08194,450.679538,42.528149,464.316504,18.936576,3.357143,0.95119,1.2,0.484234,59.701357,56.396576,35.49737,56.348488
1,7842.0,Booklet 12,5001.0,500102.0,50010202.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,INCORRECT RESPONSE,INCORRECT RESPONSE,B*,INCORRECT RESPONSE,B,C,A,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,B*,,B*,INCORRECT RESPONSE,C,INCORRECT RESPONSE,B,C,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,,,,,B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,INCORRECT RESPONSE,B,E*,B,F,,A,INCORRECT RESPONSE,,,,,,,C,B,C,B,A,D*,E,INCORRECT RESPONSE,INCORRECT RESPONSE,C,C*,INCORRECT RESPONSE,INCORRECT RESPONSE,C,C*,,C,B,INCORRECT RESPONSE,C,D*,B*,A,C,PARTIALLY CORRECT RESPONSE,,CORRECT RESPONSE,B,A,B,INCORRECT RESPONSE,B,A,B*,A*,INCORRECT RESPONSE,INCORRECT RESPONSE,B,INCORRECT RESPONSE,B,A*,B,A*,B,B*,INCORRECT RESPONSE,C*,B,A*,D,PARTIALLY CORRECT RESPONSE,,,,,,,,,,,,,,,,,,,,,,,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.75,Teacher from School,Students Booklet was used for Reliability Scoring,1.8,0.625133,0.038099,1.0,1.0,1.066667,1.0,1.5,1.125,1.0,31.0,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Time is plausible,Time is plausible,4.0,Public Use File (PUF),326.129702,37.533124,318.662552,50.233715,323.233832,56.013071,356.545632,32.103558,302.030198,47.510665,328.756934,60.146536,352.864178,18.870706,355.017696,53.087467,385.102162,33.703481,315.159308,18.44834,295.50934,37.752626,324.683132,31.573676,343.312374,31.913862,323.550294,17.371524,343.94273,45.624659,315.27428,17.844425,1.074074,0.26688,3.533333,1.382984,12.928846,11.478432,81.199519,131.941881
2,7842.0,Booklet 13,5001.0,500102.0,50010203.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,C,INCORRECT RESPONSE,C,INCORRECT RESPONSE,D,D*,A*,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,CORRECT RESPONSE,,,,,,C*,C*,CORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,A,INCORRECT RESPONSE,INCORRECT RESPONSE,C,A*,B*,B,B*,INCORRECT RESPONSE,A,B,B*,B,B*,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,A,C,INCORRECT RESPONSE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,A,,CORRECT RESPONSE,A*,A,C,B,D,INCORRECT RESPONSE,,INCORRECT RESPONSE,B,A,A*,INCORRECT RESPONSE,A*,B*,A,B,INCORRECT RESPONSE,INCORRECT RESPONSE,A,INCORRECT RESPONSE,D,D,A*,B,A*,B*,INCORRECT RESPONSE,A,B,C,D,INCORRECT RESPONSE,B,B*,C,F,D,INCORRECT RESPONSE,A,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,D,INCORRECT RESPONSE,D,INCORRECT RESPONSE,A*,INCORRECT RESPONSE,A,C,D,D*,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.5,Teacher from School,Students Booklet was used for Reliability Scoring,1.8,0.625133,0.038099,1.0,1.0,1.066667,1.0,1.5,1.125,1.0,31.0,At or above 400 but below 475,Below 400,At or above 400 but below 475,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Below 400,Time is plausible,Time is plausible,4.0,Public Use File (PUF),393.169444,38.411535,328.771868,35.715264,420.589982,39.893618,389.112222,46.014018,371.349082,21.243562,391.344424,19.908828,417.370708,36.344379,386.164962,11.464428,407.7775,37.342451,310.335522,38.33154,279.297756,25.64207,321.583934,35.663924,291.039122,45.014511,313.976578,41.869908,350.246454,37.317504,315.896516,35.867133,1.344828,0.768852,1.16129,0.454369,30.976286,61.719758,25.526767,39.224413
3,7842.0,Booklet 14,5001.0,500102.0,50010204.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,A,INCORRECT RESPONSE,INCORRECT RESPONSE,D*,INCORRECT RESPONSE,C*,B*,CORRECT RESPONSE,,C,D*,B,C*,D,,C,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,D,CORRECT RESPONSE,,CORRECT RESPONSE,CORRECT RESPONSE,B*,CORRECT RESPONSE,CORRECT RESPONSE,A*,B,B*,B,A,INCORRECT RESPONSE,B*,B,B*,A*,B*,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,D,C,INCORRECT RESPONSE,INCORRECT RESPONSE,A,A,INCORRECT RESPONSE,B*,A,B,INCORRECT RESPONSE,,INCORRECT RESPONSE,A,INCORRECT RESPONSE,INCORRECT RESPONSE,A,B,B,INCORRECT RESPONSE,B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,B,D,A,F,E,INCORRECT RESPONSE,B*,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,CORRECT RESPONSE,INCORRECT RESPONSE,D,CORRECT RESPONSE,A,INCORRECT RESPONSE,D,INCORRECT RESPONSE,D*,C,C*,C,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,14.0,Teacher from School,Students Booklet was used for Reliability Scoring,1.8,0.625133,0.038099,1.0,1.0,1.066667,1.0,1.5,1.125,1.0,31.0,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,Below 400,Below 400,Below 400,Below 400,At or above 400 but below 475,Time is plausible,Time is plausible,4.0,Public Use File (PUF),489.828648,32.961367,378.232484,47.503369,543.708308,38.052017,491.817772,39.164271,419.338646,48.523008,488.532332,35.062213,497.379814,19.371861,484.441178,37.49513,511.295352,38.939883,375.49062,46.444579,361.176526,37.927105,408.553054,32.142967,347.512598,37.19258,354.133282,54.434857,387.104264,31.146016,363.146528,43.920491,1.37931,1.177582,3.892857,0.737327,56.092828,108.735269,80.492148,148.241731
4,7842.0,Booklet 02,5001.0,500102.0,50010207.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,B*,A*,A*,B*,A*,CORRECT RESPONSE,D,INCORRECT RESPONSE,CORRECT RESPONSE,B,A*,CORRECT RESPONSE,CORRECT RESPONSE,D*,CORRECT RESPONSE,A*,B*,C*,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,CORRECT RESPONSE,D,A*,C,INCORRECT RESPONSE,INCORRECT RESPONSE,C*,INCORRECT RESPONSE,C,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,D*,B*,CORRECT RESPONSE,B,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,INCORRECT RESPONSE,,A*,,INCORRECT RESPONSE,B,A*,A*,A,B*,A*,B*,B*,INCORRECT RESPONSE,C,INCORRECT RESPONSE,D,A*,B,D,A,INCORRECT RESPONSE,B,INCORRECT RESPONSE,INCORRECT RESPONSE,,,,D,A,INCORRECT RESPONSE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.33,Teacher from School,Students Booklet not used for Reliability Scoring,1.8,0.625133,0.038099,1.0,1.0,1.066667,1.0,1.5,1.125,1.0,31.0,At or above 400 but below 475,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 400 but below 475,Below 400,Below 400,Below 400,At or above 400 but below 475,At or above 400 but below 475,Time is plausible,Time is plausible,4.0,Public Use File (PUF),461.342258,34.115325,391.614724,43.326218,504.907092,39.674531,441.28496,20.676872,428.578838,13.871167,472.745316,22.406288,490.44766,12.335686,496.971478,15.914752,488.248834,15.381402,418.494234,11.175302,383.619244,27.533919,381.390132,58.282169,391.778264,64.325072,413.508398,13.317012,402.541082,54.49612,411.977244,30.350415,1.0,0.0,6.482759,1.863574,36.396724,90.735882,74.784429,131.154615


In [21]:
school.shape

(9925, 98)

In [22]:
stud_ach.shape

(295453, 2073)

In [23]:
student.shape

(295453, 455)

In [24]:
stud_teach.shape

(775679, 115)

In [25]:
math_teach.shape

(14571, 158)

In [26]:
sci_teach.shape

(22648, 169)

In [29]:
school.to_csv('data/school.csv', index = False)

In [88]:
stud_ach.to_csv('data/stud_ach.csv', index = False)

AttributeError: 'DataFrame' object has no attribute 'name'

In [32]:
student.to_csv('data/student.csv', index = False)
stud_teach.to_csv('data/stud_teach.csv', index = False)
math_teach.to_csv('data/math_teach.csv', index = False)
sci_teach.to_csv('data/sci_teach.csv', index = False)

In [None]:
school = pd.read_csv('data/school.csv')

In [215]:
stud_ach = pd.read_csv('data/stud_ach.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [255]:
student = pd.read_csv('data/student.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [225]:
stud_teach = pd.read_csv('data/stud_teach.csv')

In [None]:
math_teach = pd.read_csv('data/math_teach.csv')

In [None]:
sci_teach = pd.read_csv('data/sci_teach.csv')

In [33]:
np.array([-1,0,1,-1,0,1]) & np.array([-1, 1, 0, 1, 0, 1])

array([-1,  0,  0,  1,  0,  1], dtype=int32)

In [207]:
def binary_scores(answer): 
    try: 
        float(answer)
        return np.nan
    except:
        if '*' in answer or ('CORRECT' in answer and 'INCORRECT' not in answer):
            return 1
        elif '*' not in answer or ('INCORRECT' in answer):
            return 0
        else:
            return np.nan

In [208]:
def nanor(t):
    t = list(filter(lambda x: str(x) != 'nan', t))
    if len(t) == 0:
        return np.nan
    else:
        return int(any(t))
    
def nanand(t):
    t = list(filter(lambda x: str(x) != 'nan', t))
    if len(t) == 0:
        return np.nan
    else:
        return int(all(t))

In [209]:
my_answer_list = ['A','D*','INCORRECT RESPONSE','CORRECT RESPONSE', np.nan]
print([binary_scores(ans) for ans in my_answer_list])

[0, 1, 0, 1, nan]


In [210]:
stud_ach['MP52024'].unique(), stud_ach['MP52024'].isnull().sum(), stud_ach['ME52024'].unique(), stud_ach['ME52024'].isnull().sum()

(array([nan, 'A', 'B*', 'D', 'C'], dtype=object),
 270445,
 array([nan, 'A', 'B*', 'D', 'C'], dtype=object),
 275885)

In [205]:
stud_ach.shape

(295453, 1239)

In [218]:
paper_cols_all = [col1 for col1 in list(stud_ach.columns) if (col1[1] == 'P' and col1[-4:] != '_val')]
for col1 in paper_cols_all:
    paper_cols = [str(col2) for col2 in list(stud_ach.columns) if (col1[:7] in col2 and col2[-4:] != '_val')]
    elec_cols = [col3 for col3 in list(stud_ach.columns) if (col1[2:7] in col3 and col1[0] == col3[0] and col3[1] == 'E')]
    # print(col1, paper_cols + elec_cols)
    for col in paper_cols+elec_cols:
        stud_ach[col] = stud_ach[col].apply(binary_scores)
    if len(paper_cols) > 0:
        stud_ach[paper_cols[0]] = stud_ach.loc[:,paper_cols].apply(nanand, axis=1)
        stud_ach[elec_cols[0]] = stud_ach.loc[:,elec_cols].apply(nanand, axis=1)
        stud_ach[col1[:7]+'_val'] = stud_ach.loc[:,[paper_cols[0],elec_cols[0]]].apply(nanor, axis=1)
        stud_ach.drop(columns=paper_cols+elec_cols, inplace=True)

MP52092 ['MP52092', 'ME52092']
MP52046 ['MP52046', 'ME52046']
MP52083 ['MP52083', 'ME52083']
MP52082 ['MP52082', 'ME52082']
MP52161 ['MP52161', 'ME52161']
MP52418A ['MP52418A', 'MP52418B', 'ME52418A', 'ME52418B']
MP52418B []
MP72007A ['MP72007A', 'MP72007B', 'MP72007C', 'MP72007D', 'MP72007E', 'MP72007', 'ME72007A', 'ME72007B', 'ME72007C', 'ME72007D', 'ME72007E', 'ME72007']
MP72007B []
MP72007C []
MP72007D []
MP72007E []
MP72007 []
MP72025 ['MP72025', 'ME72025']
MP72017 ['MP72017', 'ME72017']
MP72190 ['MP72190', 'ME72190']
MP72068 ['MP72068', 'ME72068']
MP72076 ['MP72076', 'ME72076']
MP72056 ['MP72056', 'ME72056']
MP72098 ['MP72098', 'ME72098']
MP72103 ['MP72103', 'ME72103']
MP72121 ['MP72121', 'ME72121']
MP72180 ['MP72180', 'ME72180A', 'ME72180B', 'ME72180C', 'ME72180']
MP72198A ['MP72198A', 'MP72198B', 'MP72198', 'ME72198A', 'ME72198B', 'ME72198']
MP72198B []
MP72198 []
MP72227 ['MP72227', 'ME72227']
MP72170 ['MP72170', 'ME72170A', 'ME72170B', 'ME72170C', 'ME72170']
MP72209 ['MP72209

In [219]:
stud_ach.shape

(295453, 478)

In [223]:
stud_ach.to_csv('data/stud_ach.csv', index = False)

In [221]:
stud_ach[~stud_ach.loc[:,'MP72055_val'].isnull()].loc[:,'MP72055_val']

9         0.0
10        0.0
23        0.0
24        0.0
33        0.0
         ... 
295426    0.0
295427    0.0
295438    1.0
295439    1.0
295452    0.0
Name: MP72055_val, Length: 36426, dtype: float64

In [222]:
stud_ach.iloc[100:109,:]

Unnamed: 0,IDCNTRY,IDBOOK,IDSCHOOL,IDCLASS,IDSTUD,BNRGCAL1,BNRGCAL2,IDPOP,IDGRADER,IDGRADE,ITLANG_SA,LCID_SA,ITSEX,BSDAGE,ITADMINI,ILRELIAB,TOTWGT,HOUWGT,SENWGT,WGTADJ1,WGTADJ2,WGTADJ3,WGTFAC1,WGTFAC2,WGTFAC3,JKREP,JKZONE,BSMIBM01,BSMIBM02,BSMIBM03,BSMIBM04,BSMIBM05,BSSIBM01,BSSIBM02,BSSIBM03,BSSIBM04,BSSIBM05,Part1TimeFlag,Part2TimeFlag,VERSION,SCOPE,mmat_avg,mmat_sd,ssci_avg,ssci_sd,malg_avg,malg_sd,mapp_avg,mapp_sd,mdat_avg,mdat_sd,mgeo_avg,mgeo_sd,mkno_avg,mkno_sd,mnum_avg,mnum_sd,mrea_avg,mrea_sd,sapp_avg,sapp_sd,sbio_avg,sbio_sd,sche_avg,sche_sd,sear_avg,sear_sd,skno_avg,skno_sd,sphy_avg,sphy_sd,srea_avg,srea_sd,math_freq_avg,math_freq_sd,sci_freq_avg,sci_freq_sd,math_time_avg,math_time_sd,sci_time_avg,sci_time_sd,MP52024_val,MP52058_val,MP52125_val,MP52229_val,MP52063_val,MP52072_val,MP52146_val,MP52092_val,MP52046_val,MP52083_val,MP52082_val,MP52161_val,MP52418_val,MP72007_val,MP72025_val,MP72017_val,MP72190_val,MP72068_val,MP72076_val,MP72056_val,MP72098_val,MP72103_val,MP72121_val,MP72180_val,MP72198_val,MP72227_val,MP72170_val,MP72209_val,MP62005_val,MP62139_val,MP62164_val,MP62142_val,MP62084_val,MP62351_val,MP62223_val,MP62027_val,MP62174_val,MP62244_val,MP62261_val,MP62300_val,MP62254_val,MP62132_val,MP72178_val,MP72234_val,MP72020_val,MP72027_val,MP72052_val,MP72067_val,MP72083_val,MP72108_val,MP72181_val,MP72126_val,MP72164_val,MP72185_val,MP52413_val,MP52134_val,MP52078_val,MP52034_val,MP52174_val,MP52130_val,MP52073_val,MP52110_val,MP52105_val,MP52407_val,MP52036_val,MP52502_val,MP52117_val,MP52426_val,MP62150_val,MP62335_val,MP62219_val,MP62002_val,MP62149_val,MP62241_val,MP62342_val,MP62105_val,MP62040_val,MP62288_val,MP62173_val,MP62133_val,MP62123_val,MP52079_val,MP52204_val,MP52364_val,MP52215_val,MP52147_val,MP52067_val,MP52068_val,MP52087_val,MP52048_val,MP52039_val,MP52208_val,MP52419_val,MP52115_val,MP52421_val,MP72002_val,MP72188_val,MP72035_val,MP72055_val,MP72222_val,MP72090_val,MP72233_val,MP72106_val,MP72128_val,MP72119_val,MP72153_val,MP72172_val,MP62329_val,MP62151_val,MP62346_val,MP62212_val,MP62056_val,MP62317_val,MP62350_val,MP62078_val,MP62284_val,MP62245_val,MP62287_val,MP62345_val,MP62115_val,MP72187_val,MP72022_val,MP72038_val,MP72045_val,MP72049_val,MP72069_val,MP72074_val,MP72013_val,MP72095_val,MP72109_val,MP72125_val,MP72196_val,MP72237_val,MP72232_val,MP72206_val,MP62271_val,MP62152_val,MP62215_val,MP62143_val,MP62230_val,MP62095_val,MP62076_val,MP62030_val,MP62171_val,MP62301_val,MP62194_val,MP62344_val,MP62320_val,MP62296_val,MP72001_val,MP72019_val,MP72189_val,MP72024_val,MP72043_val,MP72221_val,MP72220_val,MP72225_val,MP72110_val,MP72150_val,MP72139_val,MP72229_val,MP72171_val,MP72211_val,MP62001_val,MP62214_val,MP62146_val,MP62154_val,MP62067_val,MP62341_val,MP62242_val,MP62250_val,MP62170_val,MP62192_val,MP62072_val,MP62048_val,MP62120_val,MP72005_val,MP72021_val,MP72026_val,MP72041_val,MP72223_val,MP72094_val,MP72059_val,MP72080_val,MP72081_val,MP72140_val,MP72120_val,MP72131_val,MP72147_val,MP72154_val,MP72192_val,MP72161_val,SP52006_val,SP52069_val,SP52012_val,SP52021_val,SP52095_val,SP52134_val,SP52054_val,SP52150_val,SP52243_val,SP52206_val,SP52112_val,SP52294_val,SP72072_val,SP72029_val,SP72902_val,SP72077_val,SP72900_val,SP72103_val,SP72110_val,SP72130_val,SP72148_val,SP72200_val,SP72232_val,SP72275_val,SP72244_val,SP72301_val,SP72721_val,SP72335_val,SP62055_val,SP62007_val,SP62275_val,SP62225_val,SP62111_val,SP62116_val,SP62262_val,SP62035_val,SP62144_val,SP62162_val,SP62233_val,SP62272_val,SP62171_val,SP72002_val,SP72403_val,SP72021_val,SP72082_val,SP72066_val,SP72063_val,SP72102_val,SP72141_val,SP72921_val,SP72234_val,SP72251_val,SP72284_val,SP72345_val,SP72349_val,SP72363_val,SP52076_val,SP52272_val,SP52085_val,SP52094_val,SP52248_val,SP52146_val,SP52282_val,SP52299_val,SP52144_val,SP52214_val,SP52221_val,SP52101_val,SP52113_val,SP52107_val,SP62090_val,SP62274_val,SP62284_val,SP62098_val,SP62032_val,SP62043_val,SP62158_val,SP62159_val,SP62005_val,SP62075_val,SP62004_val,SP62175_val,SP62173_val,SP52090_val,SP52262_val,SP52267_val,SP52273_val,SP52015_val,SP52051_val,SP52026_val,SP52130_val,SP52028_val,SP52189_val,SP52217_val,SP52038_val,SP52099_val,SP52118_val,SP72070_val,SP72400_val,SP72024_val,SP72462_val,SP72443_val,SP72903_val,SP72145_val,SP72100_val,SP72133_val,SP72137_val,SP72298_val,SP72215_val,SP72260_val,SP72265_val,SP72347_val,SP72351_val,SP72367_val,SP62099_val,SP62095_val,SP62106_val,SP62064_val,SP62132_val,SP62163_val,SP62153_val,SP62018_val,SP62143_val,SP62276_val,SP62050_val,SP62205_val,SP62190_val,SP62024_val,SP72033_val,SP72440_val,SP72032_val,SP72031_val,SP72086_val,SP72005_val,SP72048_val,SP72123_val,SP72116_val,SP72920_val,SP72294_val,SP72231_val,SP72261_val,SP72220_val,SP72348_val,SP72720_val,SP62279_val,SP62112_val,SP62119_val,SP62093_val,SP62089_val,SP62006_val,SP62067_val,SP62247_val,SP62177_val,SP62186_val,SP62211_val,SP62036_val,SP62033_val,SP62037_val,SP62242_val,SP72078_val,SP72460_val,SP72000_val,SP72906_val,SP72901_val,SP72038_val,SP72120_val,SP72143_val,SP72523_val,SP72168_val,SP72205_val,SP72293_val,SP72280_val,SP72370_val,SP72329_val,SP62091_val,SP62100_val,SP62097_val,SP62101_val,SP62266_val,SP62128_val,SP62047_val,SP62042_val,SP62250_val,SP62246_val,SP62056_val,SP62235_val,SP62180_val,SP62022_val,SP62243_val,SP72011_val,SP72905_val,SP72049_val,SP72016_val,SP72451_val,SP72074_val,SP72091_val,SP72109_val,SP72140_val,SP72132_val,SP72209_val,SP72210_val,SP72249_val,SP72323_val,SP72368_val,SP72303_val
100,7842.0,Booklet 05,5006.0,500604.0,50060421.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.25,Teacher from School,Students Booklet was used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 400 but below 475,At or above 400 but below 475,Below 400,Below 400,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 400 but below 475,At or above 400 but below 475,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),419.744778,40.768982,453.977872,42.739573,401.568946,20.650783,406.572178,35.085744,401.29624,28.862584,382.916592,35.780805,400.984936,29.115184,412.03545,29.94232,407.407128,33.460751,465.629534,37.646764,480.884784,26.171716,485.478644,27.845427,481.894816,16.623368,481.881068,41.054293,481.771132,9.74557,441.962772,42.049439,1.074074,0.26688,3.111111,0.423659,99.397192,103.029586,80.614615,57.06885,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
101,7842.0,Booklet 06,5006.0,500604.0,50060422.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.92,Teacher from School,Students Booklet was used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 475 but below 550,At or above 550 but below 625,At or above 550 but below 625,At or above 475 but below 550,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,Time is plausible,Time is plausible,4.0,Public Use File (PUF),548.065842,27.390571,590.337854,27.968731,555.840142,17.0276,536.1459,27.293638,494.596278,37.457093,524.219332,20.19677,553.216864,27.482129,554.009348,20.414081,541.76398,25.130121,616.968904,17.741298,616.171444,20.798031,574.646848,29.215019,598.800602,18.155437,613.016408,29.374643,572.46116,12.634976,574.026296,20.636259,1.148148,0.362014,1.296296,0.608581,97.442115,75.705009,99.744556,105.306642,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,0.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
102,7842.0,Booklet 07,5006.0,500604.0,50060423.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.67,Teacher from School,Students Booklet not used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 550 but below 625,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),516.570894,11.712099,517.743282,26.681162,521.677458,22.997561,516.288514,13.563547,496.634756,27.657119,483.578714,36.955517,497.139098,19.938447,549.006486,31.509683,495.101452,12.467808,507.383702,12.64361,509.240542,39.680143,461.65716,34.609477,518.020762,52.672217,526.337704,8.204026,500.491992,37.333885,488.405462,17.775338,1.923077,1.016782,6.322581,2.329336,101.727538,142.372751,63.3928,51.994986,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
103,7842.0,Booklet 08,5006.0,500604.0,50060425.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.67,Teacher from School,Students Booklet was used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 475 but below 550,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 475 but below 550,At or above 625,At or above 625,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,Time is plausible,Time is plausible,4.0,Public Use File (PUF),550.516026,26.41609,610.750914,18.778254,573.760114,53.850823,568.896868,24.834335,564.97303,25.349524,552.03848,30.709713,559.304372,23.214811,574.371544,24.076611,564.705362,22.205389,615.266822,46.337397,642.60587,62.221724,578.109356,36.364105,617.207264,39.906223,620.47843,43.561777,616.091756,45.062135,576.014012,24.18577,2.84,1.545962,1.483871,0.811212,98.395,68.981985,86.744067,101.393749,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
104,7842.0,Booklet 10,5006.0,500604.0,50060427.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.5,Teacher from School,Students Booklet was used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 400 but below 475,At or above 475 but below 550,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),502.450248,24.031112,472.944258,21.204993,538.75388,25.554118,506.812914,26.205549,508.625328,23.194982,473.173092,33.708177,517.402824,28.084272,481.704304,28.050498,514.911812,28.389288,474.856506,23.318758,479.99931,33.79139,460.276378,33.368058,489.72002,59.753603,466.309172,41.966605,480.952782,30.394593,465.62063,18.869454,1.344828,0.613879,2.870968,2.012328,85.539429,47.461923,84.072103,49.099701,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,1.0,,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
105,7842.0,Booklet 11,5006.0,500604.0,50060428.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,12.83,Teacher from School,Students Booklet not used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 400 but below 475,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 550 but below 625,At or above 550 but below 625,At or above 475 but below 550,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),495.03463,33.457064,556.57296,43.280393,506.072012,25.567268,485.78447,28.680727,489.898298,38.499197,502.938172,12.732101,497.907556,32.720099,506.802852,35.491279,482.631272,13.738683,544.014278,8.923784,539.063696,15.498231,552.172804,17.027498,578.194614,53.359055,554.793154,6.125543,516.20788,25.587924,524.194984,18.045322,2.107143,1.770944,2.566667,1.165106,95.547607,82.116859,74.530815,52.49718,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,0.0,0.0,1.0,1.0,1.0,0.0,,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
106,7842.0,Booklet 12,5006.0,500604.0,50060429.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Male,13.83,Teacher from School,Students Booklet was used for Reliability Scoring,7.2,2.500531,0.152397,1.0,1.0,1.08,1.0,6.0,1.111111,0.0,51.0,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 550 but below 625,At or above 475 but below 550,At or above 475 but below 550,Time is plausible,Time is plausible,4.0,Public Use File (PUF),535.18265,10.800149,529.39192,21.778517,561.160262,11.609097,511.021122,14.70907,472.049586,45.410536,479.10647,39.517656,519.2893,15.159974,538.38863,27.306321,547.56654,18.547938,540.175758,19.129631,565.998124,24.667178,526.51337,16.703869,542.312046,37.079072,545.389718,34.197401,555.220342,29.791214,502.53367,22.840053,3.185185,1.210119,5.4,1.773366,94.625385,124.261529,79.827704,49.426861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,,1.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,
107,7842.0,Booklet 14,5006.0,500610.0,50061001.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Female,13.42,Teacher from School,Students Booklet was used for Reliability Scoring,6.923077,2.404357,0.146536,1.0,1.0,1.0,1.0,6.0,1.153846,1.0,51.0,At or above 550 but below 625,At or above 625,At or above 550 but below 625,At or above 550 but below 625,At or above 550 but below 625,At or above 625,At or above 625,At or above 625,At or above 550 but below 625,At or above 625,Time is plausible,Time is plausible,4.0,Public Use File (PUF),588.568896,35.787811,647.355002,33.783736,620.355738,28.182839,598.339204,14.885793,549.84263,44.286399,612.78761,59.713603,613.662786,22.42435,604.512536,25.399498,604.48285,25.165175,624.148936,42.001531,661.275226,27.785235,664.013478,39.544819,658.822316,35.639328,628.488282,41.228528,602.312114,38.941163,644.927232,47.209135,1.275862,0.840771,3.642857,0.82616,92.524207,60.441678,94.797889,63.089757,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0
108,7842.0,Booklet 12,5006.0,500610.0,50061003.0,,,Pop 2,Upper Grade,Grade 8,English,English (U.A.E. Abu Dhabi),Female,13.42,Teacher from School,Students Booklet was used for Reliability Scoring,6.923077,2.404357,0.146536,1.0,1.0,1.0,1.0,6.0,1.153846,1.0,51.0,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 475 but below 550,At or above 550 but below 625,Time is plausible,Time is plausible,4.0,Public Use File (PUF),532.573114,24.983721,536.277756,31.698775,536.089366,21.047435,513.958462,17.909409,508.043666,39.562703,506.742824,36.885296,531.86281,34.572588,508.420758,11.697418,541.42464,13.499889,549.644446,39.805887,524.82325,41.718942,542.129176,22.203934,498.607794,23.238168,560.62812,38.721169,515.250448,45.984825,524.096248,20.484934,1.333333,0.5547,1.566667,0.773854,95.295231,62.550234,91.680111,62.376394,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,,1.0,1.0,0.0,0.0,,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,


In [227]:
stud_teach.head()

Unnamed: 0,IDCNTRY,IDBOOK,IDSCHOOL,IDCLASS,IDSTUD,IDTEACH,IDLINK,IDPOP,IDGRADE,IDSUBJ,ITCOURSE,NMTEACH,NSTEACH,VERSION,SCOPE,MMAT_avg,MMAT_sd,SSCI_avg,SSCI_sd,MALG_avg,MALG_sd,MAPP_avg,MAPP_sd,MDAT_avg,MDAT_sd,MGEO_avg,MGEO_sd,MKNO_avg,MKNO_sd,MNUM_avg,MNUM_sd,MREA_avg,MREA_sd,SAPP_avg,SAPP_sd,SBIO_avg,SBIO_sd,SCHE_avg,SCHE_sd,SEAR_avg,SEAR_sd,SKNO_avg,SKNO_sd,SPHY_avg,SPHY_sd,SREA_avg,SREA_sd
0,7842.0,Booklet 11,5001.0,500102.0,50010201.0,500101.0,1.0,Pop 2,Grade 8,Mathematics,Mathematics,1.0,1.0,4.0,Public Use File (PUF),550.550842,14.177866,480.099946,26.598791,517.042408,28.358961,502.83618,27.430262,485.677994,10.297154,508.16895,32.114046,527.618686,23.479411,535.162198,21.271499,517.203196,13.943475,459.227332,24.484878,404.517518,42.645838,443.350576,55.498128,412.148076,46.859601,471.580088,22.08194,450.679538,42.528149,464.316504,18.936576
1,7842.0,Booklet 11,5001.0,500102.0,50010201.0,500102.0,3.0,Pop 2,Grade 8,Science,Integrated science,1.0,1.0,4.0,Public Use File (PUF),550.550842,14.177866,480.099946,26.598791,517.042408,28.358961,502.83618,27.430262,485.677994,10.297154,508.16895,32.114046,527.618686,23.479411,535.162198,21.271499,517.203196,13.943475,459.227332,24.484878,404.517518,42.645838,443.350576,55.498128,412.148076,46.859601,471.580088,22.08194,450.679538,42.528149,464.316504,18.936576
2,7842.0,Booklet 12,5001.0,500102.0,50010202.0,500102.0,3.0,Pop 2,Grade 8,Science,Integrated science,1.0,1.0,4.0,Public Use File (PUF),326.129702,37.533124,318.662552,50.233715,323.233832,56.013071,356.545632,32.103558,302.030198,47.510665,328.756934,60.146536,352.864178,18.870706,355.017696,53.087467,385.102162,33.703481,315.159308,18.44834,295.50934,37.752626,324.683132,31.573676,343.312374,31.913862,323.550294,17.371524,343.94273,45.624659,315.27428,17.844425
3,7842.0,Booklet 12,5001.0,500102.0,50010202.0,500101.0,1.0,Pop 2,Grade 8,Mathematics,Mathematics,1.0,1.0,4.0,Public Use File (PUF),326.129702,37.533124,318.662552,50.233715,323.233832,56.013071,356.545632,32.103558,302.030198,47.510665,328.756934,60.146536,352.864178,18.870706,355.017696,53.087467,385.102162,33.703481,315.159308,18.44834,295.50934,37.752626,324.683132,31.573676,343.312374,31.913862,323.550294,17.371524,343.94273,45.624659,315.27428,17.844425
4,7842.0,Booklet 13,5001.0,500102.0,50010203.0,500101.0,1.0,Pop 2,Grade 8,Mathematics,Mathematics,1.0,1.0,4.0,Public Use File (PUF),393.169444,38.411535,328.771868,35.715264,420.589982,39.893618,389.112222,46.014018,371.349082,21.243562,391.344424,19.908828,417.370708,36.344379,386.164962,11.464428,407.7775,37.342451,310.335522,38.33154,279.297756,25.64207,321.583934,35.663924,291.039122,45.014511,313.976578,41.869908,350.246454,37.317504,315.896516,35.867133


In [78]:
stud_teach.IDGRADER.unique()

array(['Upper Grade'], dtype=object)

In [79]:
stud_teach.IDGRADE.unique()

array(['Grade 8', 'Grade 9'], dtype=object)

In [80]:
stud_teach.IDSUBJ.unique()

array(['Mathematics', 'Science'], dtype=object)

In [81]:
stud_teach.ITCOURSE.unique()

array(['Mathematics', 'Integrated science', 'Physics', 'Biology',
       'Chemistry', 'Earth science', 'Biology/Chemistry',
       'Physics/Chemistry', 'Biology/Earth science'], dtype=object)

In [82]:
stud_teach.ITCOURSE.value_counts()

Mathematics              303798
Integrated science       250575
Biology                   48396
Physics                   48195
Chemistry                 47960
Earth science             33592
Physics/Chemistry         21813
Biology/Earth science     21308
Biology/Chemistry            42
Name: ITCOURSE, dtype: int64

In [83]:
stud_teach.NMTEACH.unique()

array([1., 2., 3., 5., 6., 4., 0.])

In [84]:
stud_teach.NSTEACH.unique()

array([ 1.,  3.,  2., nan,  4.,  5.,  6.,  0.,  7.,  8.])

In [85]:
stud_teach.NTEACH.unique()

array([ 2.,  4.,  3.,  6.,  8.,  9., 10.,  7.,  5.,  1.])

In [101]:
col_set = ['IDGRADER','MATSUBJ','NTEACH','SCISUBJ','MATWGT','SCIWGT','TCHWGT','JKREP','JKZONE','MMAT','SSCI','MALG','MAPP','MDAT','MGEO','MKNO','MNUM','MREA','SAPP','SBIO','SCHE','SEAR','SKNO','SPHY','SREA']
for col in stud_teach.columns:
    if 'IBM' in col:
        col_set.append(col)
print(col_set)
stud_teach.drop(col_set, axis=1, inplace=True)

['IDGRADER', 'MATSUBJ', 'NTEACH', 'SCISUBJ', 'MATWGT', 'SCIWGT', 'TCHWGT', 'JKREP', 'JKZONE', 'BSMIBM01', 'BSMIBM02', 'BSMIBM03', 'BSMIBM04', 'BSMIBM05', 'BSSIBM01', 'BSSIBM02', 'BSSIBM03', 'BSSIBM04', 'BSSIBM05']


In [236]:
stud_teach = stud_teach.loc[:,['IDCNTRY', 'IDBOOK', 'IDSCHOOL', 'IDCLASS', 'IDSTUD', 'IDTEALIN', 'IDTEACH',
       'IDLINK', 'IDPOP', 'IDGRADE', 'IDSUBJ', 'ITCOURSE', 'NMTEACH',
       'NSTEACH']]

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['IDTEALIN'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [229]:
stud_teach.columns

Index(['IDCNTRY', 'IDBOOK', 'IDSCHOOL', 'IDCLASS', 'IDSTUD', 'IDTEACH',
       'IDLINK', 'IDPOP', 'IDGRADE', 'IDSUBJ', 'ITCOURSE', 'NMTEACH',
       'NSTEACH', 'VERSION', 'SCOPE', 'MMAT_avg', 'MMAT_sd', 'SSCI_avg',
       'SSCI_sd', 'MALG_avg', 'MALG_sd', 'MAPP_avg', 'MAPP_sd', 'MDAT_avg',
       'MDAT_sd', 'MGEO_avg', 'MGEO_sd', 'MKNO_avg', 'MKNO_sd', 'MNUM_avg',
       'MNUM_sd', 'MREA_avg', 'MREA_sd', 'SAPP_avg', 'SAPP_sd', 'SBIO_avg',
       'SBIO_sd', 'SCHE_avg', 'SCHE_sd', 'SEAR_avg', 'SEAR_sd', 'SKNO_avg',
       'SKNO_sd', 'SPHY_avg', 'SPHY_sd', 'SREA_avg', 'SREA_sd'],
      dtype='object')

In [230]:
stud_teach.to_csv('data/stud_teach.csv', index = False)

In [228]:
stud_teach.shape

(775679, 47)

In [94]:
scores = ['MMAT','SSCI','MALG','MAPP','MDAT','MGEO','MKNO','MNUM','MREA','SAPP','SBIO','SCHE','SEAR','SKNO','SPHY','SREA']
for item in scores:
    sub_col = [col for col in stud_ach.columns if item in col]
    print(sub_col)
    stud_ach[item.lower()+'_avg'] = stud_ach.loc[:,sub_col].mean(axis=1)
    stud_ach[item.lower()+'_sd'] = stud_ach.loc[:,sub_col].std(axis=1)
    stud_ach.drop(sub_col, axis = 1, inplace=True)

sub_col = [col for col in stud_ach.columns if ('ME' in col and '_F' in col)]
print(sub_col)
stud_ach['math_freq_avg'] = stud_ach.loc[:,sub_col].mean(axis=1)
stud_ach['math_freq_sd'] = stud_ach.loc[:,sub_col].std(axis=1)
stud_ach.drop(sub_col, axis = 1, inplace=True)

sub_col = [col for col in stud_ach.columns if ('SE' in col and '_F' in col)]
print(sub_col)
stud_ach['sci_freq_avg'] = stud_ach.loc[:,sub_col].mean(axis=1)
stud_ach['sci_freq_sd'] = stud_ach.loc[:,sub_col].std(axis=1)
stud_ach.drop(sub_col, axis = 1, inplace=True)

sub_col = [col for col in stud_ach.columns if ('ME' in col and '_S' in col)]
print(sub_col)
stud_ach['math_time_avg'] = stud_ach.loc[:,sub_col].mean(axis=1)
stud_ach['math_time_sd'] = stud_ach.loc[:,sub_col].std(axis=1)
stud_ach.drop(sub_col, axis = 1, inplace=True)

sub_col = [col for col in stud_ach.columns if ('SE' in col and '_S' in col)]
print(sub_col)
stud_ach['sci_time_avg'] = stud_ach.loc[:,sub_col].mean(axis=1)
stud_ach['sci_time_sd'] = stud_ach.loc[:,sub_col].std(axis=1)
stud_ach.drop(sub_col, axis = 1, inplace=True)

['BSMMAT01', 'BSMMAT02', 'BSMMAT03', 'BSMMAT04', 'BSMMAT05']
['BSSSCI01', 'BSSSCI02', 'BSSSCI03', 'BSSSCI04', 'BSSSCI05']
['BSMALG01', 'BSMALG02', 'BSMALG03', 'BSMALG04', 'BSMALG05']
['BSMAPP01', 'BSMAPP02', 'BSMAPP03', 'BSMAPP04', 'BSMAPP05']
['BSMDAT01', 'BSMDAT02', 'BSMDAT03', 'BSMDAT04', 'BSMDAT05']
['BSMGEO01', 'BSMGEO02', 'BSMGEO03', 'BSMGEO04', 'BSMGEO05']
['BSMKNO01', 'BSMKNO02', 'BSMKNO03', 'BSMKNO04', 'BSMKNO05']
['BSMNUM01', 'BSMNUM02', 'BSMNUM03', 'BSMNUM04', 'BSMNUM05']
['BSMREA01', 'BSMREA02', 'BSMREA03', 'BSMREA04', 'BSMREA05']
['BSSAPP01', 'BSSAPP02', 'BSSAPP03', 'BSSAPP04', 'BSSAPP05']
['BSSBIO01', 'BSSBIO02', 'BSSBIO03', 'BSSBIO04', 'BSSBIO05']
['BSSCHE01', 'BSSCHE02', 'BSSCHE03', 'BSSCHE04', 'BSSCHE05']
['BSSEAR01', 'BSSEAR02', 'BSSEAR03', 'BSSEAR04', 'BSSEAR05']
['BSSKNO01', 'BSSKNO02', 'BSSKNO03', 'BSSKNO04', 'BSSKNO05']
['BSSPHY01', 'BSSPHY02', 'BSSPHY03', 'BSSPHY04', 'BSSPHY05']
['BSSREA01', 'BSSREA02', 'BSSREA03', 'BSSREA04', 'BSSREA05']
['ME52024_F', 'ME52058_F

In [253]:
stud_ach.shape

(295453, 478)

In [105]:
stud_ach.to_csv('data/stud_ach.csv', index = False)
stud_teach.to_csv('data/stud_teach.csv', index = False)

In [256]:
student.shape[0]-student.BSDGICC.isnull().sum()

65556

In [257]:
student.shape

(295453, 455)

In [262]:
student.loc[:,['BSBGHER','BSDGHER','BSBGSB','BSDGSB','BSBGSEC','BSDGSEC']]

Unnamed: 0,BSBGHER,BSDGHER,BSBGSB,BSDGSB,BSBGSEC,BSDGSEC
0,10.84499,Some Resources,12.93734,Never or Almost Never,7.68313,Medium Self-Efficacy
1,10.23835,Some Resources,12.93734,Never or Almost Never,8.52147,Medium Self-Efficacy
2,9.03613,Some Resources,12.93734,Never or Almost Never,9.52551,High Self-Efficacy
3,10.23835,Some Resources,12.93734,Never or Almost Never,7.25444,Medium Self-Efficacy
4,10.23835,Some Resources,12.85595,Never or Almost Never,7.28369,Medium Self-Efficacy
...,...,...,...,...,...,...
295448,12.25657,Many Resources,10.41803,Never or Almost Never,,
295449,10.23835,Some Resources,9.56975,Never or Almost Never,,
295450,7.34568,Few Resources,9.92809,Never or Almost Never,,
295451,11.48712,Some Resources,12.93734,Never or Almost Never,,


In [265]:
list(dfs['BSGM7'].index)

['IDCNTRY',
 'IDBOOK',
 'IDSCHOOL',
 'IDCLASS',
 'IDSTUD',
 'BSBG01',
 'BSBG03',
 'BSBG04',
 'BSBG05A',
 'BSBG05B',
 'BSBG05C',
 'BSBG05D',
 'BSBG05E',
 'BSBG05F',
 'BSBG05G',
 'BSBG05H',
 'BSBG05I',
 'BSBG06A',
 'BSBG06B',
 'BSBG07',
 'BSBG08A',
 'BSBG08B',
 'BSBG09A',
 'BSBG09B',
 'BSBG10',
 'BSBG11A',
 'BSBG11B',
 'BSBG12A',
 'BSBG12B',
 'BSBG12C',
 'BSBG12D',
 'BSBG12E',
 'BSBG12F',
 'BSBG13A',
 'BSBG13B',
 'BSBG13C',
 'BSBG13D',
 'BSBG13E',
 'BSBG14A',
 'BSBG14B',
 'BSBG14C',
 'BSBG14D',
 'BSBG14E',
 'BSBG14F',
 'BSBG14G',
 'BSBG14H',
 'BSBG14I',
 'BSBG14J',
 'BSBG14K',
 'BSBG14L',
 'BSBG14M',
 'BSBG14N',
 'BSBM15',
 'BSBM16A',
 'BSBM16B',
 'BSBM16C',
 'BSBM16D',
 'BSBM16E',
 'BSBM16F',
 'BSBM16G',
 'BSBM16H',
 'BSBM16I',
 'BSBM17A',
 'BSBM17B',
 'BSBM17C',
 'BSBM17D',
 'BSBM17E',
 'BSBM17F',
 'BSBM17G',
 'BSBM18A',
 'BSBM18B',
 'BSBM18C',
 'BSBM18D',
 'BSBM18E',
 'BSBM18F',
 'BSBM19A',
 'BSBM19B',
 'BSBM19C',
 'BSBM19D',
 'BSBM19E',
 'BSBM19F',
 'BSBM19G',
 'BSBM19H',
 'BSBM19I',

In [266]:
student_cols = ['IDCNTRY','IDBOOK','IDSCHOOL','IDCLASS','IDSTUD','BSBG01','BSBG03','BSBG04','BSBG05A','BSBG05B','BSBG05C','BSBG05D','BSBG05E','BSBG05F','BSBG05G','BSBG05H','BSBG05I','BSBG06A','BSBG06B','BSBG07','BSBG08A','BSBG08B','BSBG09A','BSBG09B','BSBG10','BSBG11A','BSBG11B','BSBG12A','BSBG12B','BSBG12C','BSBG12D','BSBG12E','BSBG12F','IDPOP','IDGRADE','ITLANG_SQ','LCID_SQ','ITLANG_SA','LCID_SA','ITSEX','BSDAGE','BSBGHER','BSDGHER','BSBGSSB','BSDGSSB','BSBGSB','BSDGSB','BSBGSLM','BSDGSLM','BSBGICM','BSDGICM','BSBGDML','BSDGDML','BSBGSCM','BSDGSCM','BSBGSVM','BSDGSVM','BSBGSLS','BSDGSLS','BSBGICS','BSDGICS','BSBGSCS','BSDGSCS','BSBGSVS','BSDGSVS','BSBGSLB','BSDGSLB','BSBGICB','BSDGICB','BSBGSCB','BSDGSCB','BSBGSLE','BSDGSLE','BSBGICE','BSDGICE','BSBGSCE','BSDGSCE','BSBGSLC','BSDGSLC','BSBGICC','BSDGICC','BSBGSCC','BSDGSCC','BSBGSLP','BSDGSLP','BSBGICP','BSDGICP','BSBGSCP','BSDGSCP','BSBGSEC','BSDGSEC', 'BSDG05S','BSDGEDUP']
student = student.loc[:,student_cols]

In [267]:
student.shape

(295453, 93)

In [268]:
student.to_csv('data/student.csv', index=False)

In [269]:
student.dtypes

IDCNTRY      float64
IDBOOK        object
IDSCHOOL     float64
IDCLASS      float64
IDSTUD       float64
BSBG01        object
BSBG03        object
BSBG04        object
BSBG05A       object
BSBG05B       object
BSBG05C       object
BSBG05D       object
BSBG05E       object
BSBG05F       object
BSBG05G       object
BSBG05H       object
BSBG05I       object
BSBG06A       object
BSBG06B       object
BSBG07        object
BSBG08A       object
BSBG08B       object
BSBG09A       object
BSBG09B       object
BSBG10        object
BSBG11A       object
BSBG11B       object
BSBG12A       object
BSBG12B       object
BSBG12C       object
BSBG12D       object
BSBG12E       object
BSBG12F       object
IDPOP         object
IDGRADE       object
ITLANG_SQ     object
LCID_SQ       object
ITLANG_SA     object
LCID_SA       object
ITSEX         object
BSDAGE       float64
BSBGHER      float64
BSDGHER       object
BSBGSSB      float64
BSDGSSB       object
BSBGSB       float64
BSDGSB        object
BSBGSLM      

In [272]:
count_avg_stud_col = ['BSBGHER','BSBGSSB','BSBGSB','BSBGSLM','BSBGICM','BSBGDML','BSBGSCM','BSBGSVM','BSBGSLS','BSBGICS','BSBGSCS','BSBGSVS','BSBGSLB','BSBGICB','BSBGSCB','BSBGSLE','BSBGICE','BSBGSCE','BSBGSLC','BSBGICC','BSBGSCC','BSBGSLP','BSBGICP','BSBGSCP','BSBGSEC']
student.groupby('IDCNTRY').mean().loc[:,count_avg_stud_col]

Unnamed: 0_level_0,BSBGHER,BSBGSSB,BSBGSB,BSBGSLM,BSBGICM,BSBGDML,BSBGSCM,BSBGSVM,BSBGSLS,BSBGICS,BSBGSCS,BSBGSVS,BSBGSLB,BSBGICB,BSBGSCB,BSBGSLE,BSBGICE,BSBGSCE,BSBGSLC,BSBGICC,BSBGSCC,BSBGSLP,BSBGICP,BSBGSCP,BSBGSEC
IDCNTRY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
36.0,11.213347,9.525933,9.53774,9.484285,9.82098,9.398237,10.059295,9.757369,9.657627,9.744725,9.761588,9.668799,,,,,,,,,,,,,
48.0,10.2448,9.811352,9.516008,9.885598,10.122393,9.649973,10.397799,9.716578,10.447122,10.38852,10.716086,10.525074,,,,,,,,,,,,,
152.0,10.034126,9.6673,9.867485,9.582758,9.555929,9.201118,9.72495,9.668255,9.622368,9.418506,9.564555,9.547269,,,,,,,,,,,,,9.775152
158.0,10.382371,9.24733,10.992074,9.238279,9.792956,10.520082,9.175286,8.249361,9.28373,9.636462,8.850432,8.841529,,,,,,,,,,,,,8.518955
196.0,10.920616,9.016033,10.019355,9.625078,9.838273,9.601703,10.155569,9.586986,,,,9.736598,9.816291,9.731253,9.954975,9.16548,9.516894,10.14932,10.582896,10.329169,10.888379,10.103568,10.153041,10.121316,
246.0,11.155315,10.148428,10.815054,9.109938,9.853814,10.067458,9.955591,9.026701,,,,9.139229,9.154888,9.736672,9.768053,9.525591,9.828655,9.875557,9.366019,9.715526,9.641212,9.212325,9.695865,9.622839,9.993364
250.0,10.657268,9.337969,10.044469,9.463502,9.457429,10.149034,9.814206,9.261788,,,,9.202494,9.485906,9.198179,9.361111,9.752875,9.322807,9.402387,9.309069,8.976155,9.460197,9.474598,9.038521,9.604246,10.447073
268.0,10.912833,10.083011,11.145314,10.16568,10.646432,10.167295,10.157617,10.068739,,,,10.223703,10.465856,10.877833,10.274719,10.254659,10.604185,10.031458,10.341517,10.625024,10.199093,10.236771,10.604021,9.927712,10.395565
344.0,10.254207,9.102064,9.92616,9.405436,9.227997,9.973955,9.39881,8.685595,9.635687,9.449785,9.454729,9.456589,,,,,,,,,,,,,8.546672
348.0,11.122011,9.518511,10.229443,9.2006,9.591924,10.251706,10.09811,9.138363,,,,9.273862,9.464638,9.68395,10.199971,9.176818,9.580016,10.070679,9.062943,9.289606,9.494266,9.365191,9.495356,9.995927,10.358788
