In [91]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots

# import warnings
# warnings.filterwarnings('ignore')

***All Common***

In [92]:
def find_values_diff(col1, col2):    
    col1_col2 = np.setdiff1d(col1, col2)
    col2_col1 = np.setdiff1d(col2, col1)
    
    return np.concatenate((col1_col2, col2_col1))

#### Начнем исследовать данные

Посмотрю на каждую таблицу и кратенько отмечую интересные моменты

In [93]:
assessments = pd.read_csv('assessments.csv') # Информация об оценках в модулях
resultAssessments = pd.read_csv('studentAssessment.csv') # Сами оценки
courses = pd.read_csv('courses.csv') # Список модулей
studentInfo = pd.read_csv('studentInfo.csv') # Общее инфо о студенте
registration = pd.read_csv('studentRegistration.csv') # Инфа о регистрации студента на модули
studentVle = pd.read_csv('studentVle.csv') # Взаимодействие студента с виртуальной средой обучения
vle = pd.read_csv('vle.csv') # Доступные материалы в виртуальной среде обучения

##### Сначала глянем assessments и resultAssessments

In [94]:
# assessments
print(assessments.info())
assessments.sample(7)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 206 entries, 0 to 205
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   code_module        206 non-null    object 
 1   code_presentation  206 non-null    object 
 2   id_assessment      206 non-null    int64  
 3   assessment_type    206 non-null    object 
 4   date               195 non-null    float64
 5   weight             206 non-null    float64
dtypes: float64(2), int64(1), object(3)
memory usage: 9.8+ KB
None


Unnamed: 0,code_module,code_presentation,id_assessment,assessment_type,date,weight
136,FFF,2013B,34872,Exam,222.0,100.0
111,EEE,2013J,30711,TMA,124.0,28.0
181,GGG,2013J,37423,CMA,229.0,0.0
164,FFF,2014J,34905,CMA,241.0,0.0
204,GGG,2014J,37437,TMA,173.0,0.0
151,FFF,2014B,34892,CMA,227.0,0.0
82,DDD,2013B,25335,TMA,53.0,10.0


In [200]:
# resultAssessments
print(resultAssessments.info()) # Без пропусков - это хорошо
resultAssessments.sample(7)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173912 entries, 0 to 173911
Data columns (total 4 columns):
 #   Column          Non-Null Count   Dtype
---  ------          --------------   -----
 0   id_assessment   173912 non-null  int64
 1   id_student      173912 non-null  int64
 2   date_submitted  173912 non-null  int64
 3   is_banked       173912 non-null  int64
dtypes: int64(4)
memory usage: 5.3 MB
None


Unnamed: 0,id_assessment,id_student,date_submitted,is_banked
161821,37419,574948,131,0
62330,24297,633504,147,0
160580,37417,586598,172,0
112499,34868,2001665,178,0
97370,30710,599478,84,0
18503,14998,230081,79,0
14972,14996,535833,18,0


In [102]:
absent_ids = find_values_diff(resultAssessments["id_assessment"], assessments["id_assessment"])
assesments_absent = assessments[assessments["id_assessment"].isin(absent_ids)]
assesments_absent # Остутствуют экзамены

Unnamed: 0,code_module,code_presentation,id_assessment,assessment_type,date,weight
5,AAA,2013J,1757,Exam,,100.0
11,AAA,2014J,1763,Exam,,100.0
23,BBB,2013B,14990,Exam,,100.0
35,BBB,2013J,15002,Exam,,100.0
47,BBB,2014B,15014,Exam,,100.0
53,BBB,2014J,15025,Exam,,100.0
63,CCC,2014B,40087,Exam,,100.0
73,CCC,2014J,40088,Exam,,100.0
113,EEE,2013J,30713,Exam,235.0,100.0
118,EEE,2014B,30718,Exam,228.0,100.0


In [97]:
group = assessments.groupby("assessment_type").count()
group[group.index == "Exam"] # Приэтом для каких-то 6 экзаменов оценки есть. Ладно(

Unnamed: 0_level_0,code_module,code_presentation,id_assessment,date,weight
assessment_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Exam,24,24,24,13,24


In [98]:
# Объединим эти таблицы
assesmentsFullInfo = pd.merge(
    resultAssessments, 
    assessments,
    how="inner",
    on="id_assessment",
    )
assert(
    (len(assessments.columns) + len(resultAssessments.columns) - 1) == len(assesmentsFullInfo.columns)
    )
assert(
    assesmentsFullInfo.shape[0] == resultAssessments.shape[0]
)
print(assesmentsFullInfo.info())
assesmentsFullInfo.sample(7)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173912 entries, 0 to 173911
Data columns (total 9 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id_assessment      173912 non-null  int64  
 1   id_student         173912 non-null  int64  
 2   date_submitted     173912 non-null  int64  
 3   is_banked          173912 non-null  int64  
 4   code_module        173912 non-null  object 
 5   code_presentation  173912 non-null  object 
 6   assessment_type    173912 non-null  object 
 7   date               171047 non-null  float64
 8   weight             173912 non-null  float64
dtypes: float64(2), int64(4), object(3)
memory usage: 11.9+ MB
None


Unnamed: 0,id_assessment,id_student,date_submitted,is_banked,code_module,code_presentation,assessment_type,date,weight
45618,15024,686822,200,0,BBB,2014J,TMA,201.0,35.0
168061,37430,630787,147,0,GGG,2014B,CMA,222.0,0.0
94745,25367,676373,195,0,DDD,2014J,TMA,195.0,25.0
66426,25335,371184,53,0,DDD,2013B,TMA,53.0,10.0
167194,37429,593965,122,0,GGG,2014B,CMA,222.0,0.0
43533,15022,2248243,98,0,BBB,2014J,TMA,110.0,20.0
85415,25357,627581,74,0,DDD,2014B,TMA,74.0,17.5


In [100]:
print(assesmentsFullInfo["is_banked"].value_counts()) # Выкинем этот признак, у него слишком плохое распределение
assesmentsFullInfoV1 = assesmentsFullInfo.drop("is_banked", axis=1) # Будем версионировать DF чтоб была полная воспроизводимость

is_banked
0    172003
1      1909
Name: count, dtype: int64


### Объединим с courses

In [125]:
find_values_diff(
   col1=courses["code_module"] + courses["code_presentation"],
   col2=assesmentsFullInfoV1["code_module"] + assesmentsFullInfoV1["code_presentation"]
) # Ок, можем спокойной мержить

assesmentsFullCourses = pd.merge(
    assesmentsFullInfoV1,
    courses,
    how="inner",
    on=["code_module", "code_presentation"]
)
assert(
    len(assesmentsFullInfoV1.columns) + 1 == len(assesmentsFullCourses.columns)
    )
assert(
    assesmentsFullCourses.shape[0] == assesmentsFullInfoV1.shape[0]
)
assesmentsFullCourses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173912 entries, 0 to 173911
Data columns (total 9 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   id_assessment               173912 non-null  int64  
 1   id_student                  173912 non-null  int64  
 2   date_submitted              173912 non-null  int64  
 3   code_module                 173912 non-null  object 
 4   code_presentation           173912 non-null  object 
 5   assessment_type             173912 non-null  object 
 6   date                        171047 non-null  float64
 7   weight                      173912 non-null  float64
 8   module_presentation_length  173912 non-null  int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 11.9+ MB


#### Проделаем похожее с таблицами о студентах

In [130]:
registration.info()
registration.sample(7) # Регистраций сильно меньше чем оценок, это ок тк по каждому модулю может быть несколько оценок

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32593 entries, 0 to 32592
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   code_module        32593 non-null  object 
 1   code_presentation  32593 non-null  object 
 2   id_student         32593 non-null  int64  
 3   date_registration  32548 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 1018.7+ KB


Unnamed: 0,code_module,code_presentation,id_student,date_registration
6790,BBB,2014J,551237,-23.0
22588,FFF,2013B,382086,-27.0
32051,GGG,2014J,638503,-58.0
26406,FFF,2014B,407500,-25.0
31683,GGG,2014B,631788,-8.0
23595,FFF,2013B,556489,-18.0
26318,FFF,2014B,301128,-305.0


In [129]:
reg_diff = find_values_diff(
   col1=assesmentsFullCourses["code_module"] + assesmentsFullCourses["code_presentation"] + assesmentsFullCourses["id_student"].astype(str),
   col2=registration["code_module"] + registration["code_presentation"] + registration["id_student"].astype(str)
)
len(reg_diff) # Думаю это ок, студенты могли не регистрироваться на курсы вообще
# будем мержить inner, тк если не регистрировался то и не сдавал выходит

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32593 entries, 0 to 32592
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   code_module        32593 non-null  object 
 1   code_presentation  32593 non-null  object 
 2   id_student         32593 non-null  int64  
 3   date_registration  32548 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 1018.7+ KB


6750

In [146]:
assesmentsFullCoursesReg = pd.merge(
    registration,
    assesmentsFullCourses,
    how="inner",
    on=["code_module", "code_presentation", "id_student"]
)
assesmentsFullCoursesReg.info()
assesmentsFullCoursesReg.sample(7)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173912 entries, 0 to 173911
Data columns (total 10 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   code_module                 173912 non-null  object 
 1   code_presentation           173912 non-null  object 
 2   id_student                  173912 non-null  int64  
 3   date_registration           173905 non-null  float64
 4   id_assessment               173912 non-null  int64  
 5   date_submitted              173912 non-null  int64  
 6   assessment_type             173912 non-null  object 
 7   date                        171047 non-null  float64
 8   weight                      173912 non-null  float64
 9   module_presentation_length  173912 non-null  int64  
dtypes: float64(3), int64(4), object(3)
memory usage: 13.3+ MB


Unnamed: 0,code_module,code_presentation,id_student,date_registration,id_assessment,date_submitted,assessment_type,date,weight,module_presentation_length
107299,FFF,2013B,467725,-16.0,34866,80,CMA,222.0,0.0,240
74495,DDD,2013B,2227786,-22.0,25345,152,CMA,149.0,4.0,240
28524,BBB,2013J,2550290,-58.0,14997,51,TMA,47.0,18.0,268
69303,DDD,2013B,500136,-151.0,25343,81,CMA,79.0,3.0,240
73538,DDD,2013B,559612,-16.0,25342,54,CMA,51.0,3.0,240
109377,FFF,2013B,528075,-21.0,34869,171,CMA,222.0,0.0,240
649,AAA,2013J,324084,-165.0,1755,166,TMA,166.0,20.0,268


##### Объединим с инфой о студенте

In [153]:
studentInfo.info()
studentInfo.sample(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32593 entries, 0 to 32592
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   code_module           32593 non-null  object
 1   code_presentation     32593 non-null  object
 2   id_student            32593 non-null  int64 
 3   gender                32593 non-null  object
 4   region                32593 non-null  object
 5   highest_education     32593 non-null  object
 6   imd_band              31482 non-null  object
 7   age_band              32593 non-null  object
 8   num_of_prev_attempts  32593 non-null  int64 
 9   studied_credits       32593 non-null  int64 
 10  disability            32593 non-null  object
dtypes: int64(3), object(8)
memory usage: 2.7+ MB


Unnamed: 0,code_module,code_presentation,id_student,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,studied_credits,disability
5818,BBB,2014B,624738,M,Yorkshire Region,Lower Than A Level,10-20,0-35,0,60,N
23299,FFF,2013B,543536,M,Ireland,Lower Than A Level,50-60%,0-35,0,150,N
15414,DDD,2013J,572648,M,Ireland,A Level or Equivalent,10-20,0-35,0,60,N
25872,FFF,2013J,608169,M,Wales,HE Qualification,40-50%,35-55,0,90,N
31184,GGG,2014B,578845,F,North Region,A Level or Equivalent,10-20,0-35,0,30,N
27568,FFF,2014B,1738364,M,South Region,HE Qualification,90-100%,35-55,0,60,N
2251,BBB,2013B,558602,F,East Anglian Region,Lower Than A Level,30-40%,0-35,0,60,N
16843,DDD,2014B,587403,M,Wales,A Level or Equivalent,80-90%,0-35,1,210,Y
27222,FFF,2014B,626692,M,East Anglian Region,Lower Than A Level,50-60%,0-35,0,60,N
21701,EEE,2014J,649843,M,London Region,A Level or Equivalent,10-20,0-35,0,60,N


In [154]:
students_diff = find_values_diff(
   col1=assesmentsFullCoursesReg["code_module"] + assesmentsFullCoursesReg["code_presentation"] + assesmentsFullCoursesReg["id_student"].astype(str),
   col2=studentInfo["code_module"] + studentInfo["code_presentation"] + studentInfo["id_student"].astype(str)
)
len(students_diff) # Такая же картина как с регистрациями - это логично

6750

In [158]:
assesmentsCoursesRegStudentInfo = pd.merge(
    assesmentsFullCoursesReg,
    studentInfo,
    how="inner",
    on=["code_module", "code_presentation", "id_student"]
)
assesmentsCoursesRegStudentInfo.info() # Shape ok
assesmentsCoursesRegStudentInfo.sample(10) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173912 entries, 0 to 173911
Data columns (total 18 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   code_module                 173912 non-null  object 
 1   code_presentation           173912 non-null  object 
 2   id_student                  173912 non-null  int64  
 3   date_registration           173905 non-null  float64
 4   id_assessment               173912 non-null  int64  
 5   date_submitted              173912 non-null  int64  
 6   assessment_type             173912 non-null  object 
 7   date                        171047 non-null  float64
 8   weight                      173912 non-null  float64
 9   module_presentation_length  173912 non-null  int64  
 10  gender                      173912 non-null  object 
 11  region                      173912 non-null  object 
 12  highest_education           173912 non-null  object 
 13  imd_band      

Unnamed: 0,code_module,code_presentation,id_student,date_registration,id_assessment,date_submitted,assessment_type,date,weight,module_presentation_length,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,studied_credits,disability
164957,GGG,2014B,365084,-8.0,37426,117,TMA,117.0,0.0,241,F,South Region,A Level or Equivalent,30-40%,35-55,0,30,N
53383,CCC,2014B,2486769,-154.0,24287,69,CMA,67.0,7.0,241,M,London Region,HE Qualification,0-10%,35-55,0,90,N
121489,FFF,2013J,559979,-149.0,34882,182,CMA,236.0,0.0,268,F,East Anglian Region,HE Qualification,20-30%,0-35,0,90,N
140177,FFF,2014B,630655,-28.0,34889,129,TMA,129.0,25.0,241,M,Scotland,A Level or Equivalent,30-40%,0-35,0,60,N
26265,BBB,2013J,606572,-30.0,14999,131,TMA,131.0,18.0,268,F,West Midlands Region,A Level or Equivalent,0-10%,35-55,0,120,N
8597,BBB,2013B,534983,-77.0,14985,48,TMA,47.0,18.0,240,F,West Midlands Region,A Level or Equivalent,50-60%,0-35,0,120,N
66987,DDD,2013B,361209,-30.0,25336,79,TMA,81.0,12.5,240,M,Yorkshire Region,A Level or Equivalent,0-10%,0-35,1,180,N
33687,BBB,2014B,613299,-144.0,15009,39,TMA,40.0,18.0,234,F,West Midlands Region,Lower Than A Level,20-30%,0-35,0,60,N
108797,FFF,2013B,519472,-36.0,34867,164,CMA,222.0,0.0,240,M,West Midlands Region,A Level or Equivalent,30-40%,0-35,0,60,N
104920,FFF,2013B,244114,-52.0,34860,18,TMA,19.0,12.5,240,M,West Midlands Region,Lower Than A Level,60-70%,0-35,0,60,N


### Инфо о взаимодействии с материалами попробуем сгруппировать

In [170]:
vle[["week_from", "week_to"]].count() # Эти признаки рассматривать не будем их меньше 20%
vleV1 = vle.drop(["week_from", "week_to"], axis=1)
vleV1.sample(10)

Unnamed: 0,id_site,code_module,code_presentation,activity_type
2163,547305,DDD,2013B,externalquiz
3949,832669,EEE,2014J,oucontent
4895,716981,FFF,2013J,oucollaborate
1536,913678,BBB,2014J,resource
6145,798674,GGG,2014B,oucontent
3172,773100,DDD,2014B,subpage
4075,527311,FFF,2013B,url
1517,913481,BBB,2014J,oucontent
1514,913613,BBB,2014J,resource
5260,779427,FFF,2014B,oucontent


In [171]:
vleV1["activity_type"].value_counts() # Сомнительная конечно таблица, но ок оставим

activity_type
resource          2660
subpage           1055
oucontent          996
url                886
forumng            194
quiz               127
page               102
oucollaborate       82
questionnaire       61
ouwiki              49
dataplus            28
externalquiz        26
homepage            22
ouelluminate        21
glossary            21
dualpane            20
repeatactivity       5
htmlactivity         4
sharedsubpage        3
folder               2
Name: count, dtype: int64

In [169]:
studentVle.info()
studentVle.sample(7)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10655280 entries, 0 to 10655279
Data columns (total 6 columns):
 #   Column             Dtype 
---  ------             ----- 
 0   code_module        object
 1   code_presentation  object
 2   id_student         int64 
 3   id_site            int64 
 4   date               int64 
 5   sum_click          int64 
dtypes: int64(4), object(2)
memory usage: 487.8+ MB


Unnamed: 0,code_module,code_presentation,id_student,id_site,date,sum_click
10345377,GGG,2013J,569679,661599,146,3
1287804,BBB,2014B,477779,768521,37,1
8454550,FFF,2014B,633403,779556,6,1
5925006,EEE,2014J,684907,832660,19,2
4980472,DDD,2014J,2423655,813954,62,1
9336295,FFF,2014J,1818385,882553,23,1
5882621,EEE,2014J,574565,832668,4,1


In [175]:
find_values_diff(studentVle["id_site"], vleV1["id_site"]) # Есть относительно небольшое кол-во пропусков

array([ 526733,  526818,  527224,  527386,  527399,  527404,  527493,
        527506,  546872,  546897,  547278,  547638,  547639,  547646,
        547648,  547663,  547668,  547682,  547684,  547689,  547692,
        547693,  547718,  547733,  547742,  547771,  547782,  547800,
        547941,  547969,  551014,  551036,  551124,  551159,  551169,
        673527,  673529,  673535,  674130,  674323,  674324,  703977,
        716221,  716675,  716987,  729799,  729803,  729805,  729808,
        729824,  729831,  729835,  729836,  729837,  729838,  729839,
        729840,  729845,  730034,  779622,  790925,  813957,  832764,
        877133,  883074,  909241,  913688,  923834,  923835,  924208,
        924209,  924210,  924211,  924222,  924225,  924383,  924387,
        924388,  936658,  936659,  936763, 1027118, 1032910, 1071060,
       1072098, 1072099, 1072100, 1072236, 1072237, 1072239, 1072240,
       1072252, 1072253, 1073191, 1076553, 1077905])

In [179]:
studentInteractions = pd.merge(
    studentVle,
    vleV1,
    how="inner",
    on=["id_site", "code_module", "code_presentation"]
)
assert(studentInteractions.shape[1] == (studentVle.shape[1] + 1)) # Добавили только activity_type

## Преобразуем данные

In [182]:
studentInteractions.sample(10) # Предлагается поступить глупо и посчитать общее кол-во взаимодействий с модулем

Unnamed: 0,code_module,code_presentation,id_student,id_site,date,sum_click,activity_type
7198028,FFF,2013B,193834,527420,215,1,resource
463305,BBB,2013B,555695,543091,-8,1,resource
8945819,FFF,2014B,544593,779429,94,1,oucontent
2177058,CCC,2014B,282193,729678,31,1,forumng
9204806,FFF,2014J,652064,882670,11,4,oucontent
9607693,FFF,2014J,412353,882985,8,2,resource
4433718,DDD,2014B,629822,773182,2,2,subpage
5768215,EEE,2014B,633544,790856,108,1,oucontent
3114947,CCC,2014J,347809,1010405,208,2,resource
2889862,CCC,2014J,585769,909020,99,1,forumng


In [183]:
assesmentsCoursesRegStudentInfo.sample(10)

Unnamed: 0,code_module,code_presentation,id_student,date_registration,id_assessment,date_submitted,assessment_type,date,weight,module_presentation_length,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,studied_credits,disability
12705,BBB,2013B,559389,-24.0,14984,17,TMA,19.0,5.0,240,F,South Region,Lower Than A Level,80-90%,0-35,0,60,N
36179,BBB,2014B,628982,-30.0,15009,40,TMA,40.0,18.0,234,F,Wales,HE Qualification,0-10%,0-35,0,60,N
159136,GGG,2013J,352382,-53.0,37422,207,CMA,229.0,0.0,261,F,Yorkshire Region,A Level or Equivalent,70-80%,35-55,0,30,N
123142,FFF,2013J,573152,-30.0,34880,118,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
140226,FFF,2014B,630830,-29.0,34886,19,TMA,24.0,12.5,241,M,North Region,Lower Than A Level,,0-35,0,60,N
26167,BBB,2013J,606410,-22.0,15005,133,CMA,131.0,1.0,268,M,Ireland,Lower Than A Level,10-20,0-35,0,60,N
66389,DDD,2013B,283618,-17.0,25341,25,CMA,23.0,2.0,240,M,West Midlands Region,Lower Than A Level,90-100%,35-55,2,120,N
35309,BBB,2014B,625167,-29.0,15009,39,TMA,40.0,18.0,234,F,Wales,Lower Than A Level,10-20,0-35,0,60,N
20958,BBB,2013J,571681,-191.0,15006,168,CMA,166.0,1.0,268,F,Ireland,HE Qualification,0-10%,0-35,0,60,N
47998,CCC,2014B,485604,-54.0,24288,139,CMA,137.0,8.0,241,F,London Region,A Level or Equivalent,20-30%,0-35,0,30,N


In [186]:
assesmentsCoursesRegStudentInfo[assesmentsCoursesRegStudentInfo["id_student"] == 573152]

Unnamed: 0,code_module,code_presentation,id_student,date_registration,id_assessment,date_submitted,assessment_type,date,weight,module_presentation_length,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,studied_credits,disability
123136,FFF,2013J,573152,-30.0,34873,27,TMA,19.0,12.5,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123137,FFF,2013J,573152,-30.0,34875,96,TMA,96.0,25.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123138,FFF,2013J,573152,-30.0,34876,138,TMA,131.0,25.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123139,FFF,2013J,573152,-30.0,34877,180,TMA,173.0,25.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123140,FFF,2013J,573152,-30.0,34878,49,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123141,FFF,2013J,573152,-30.0,34879,42,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123142,FFF,2013J,573152,-30.0,34880,118,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123143,FFF,2013J,573152,-30.0,34881,185,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123144,FFF,2013J,573152,-30.0,34882,185,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
123145,FFF,2013J,573152,-30.0,34883,234,CMA,236.0,0.0,268,M,Yorkshire Region,A Level or Equivalent,20-30%,0-35,0,60,N
