# MGSE ICCR: Data Wrangling (Australian Lexicon)


Intro:
Just grab the terms, wrangle and export as csv for visualisation


Date: 30/1/2020

Version: 0.01

Environment: Python 3.7.6 and Jupyter notebook

Libraries used: 
    pandas, re, string, csv

In [1]:
# Import the libraries needed to read and report on data files
import pandas as pd
import re
import numpy as np
import string

In [2]:
colNames= ('term', 'descr', 'egs')
df = pd.read_csv("AusTerms.csv",names=colNames,skiprows=1)

# df.dropna(how='all', inplace=True)
df.head(62)

Unnamed: 0,term,descr,egs
0,answering questions,The activity of responding to a question or a ...,Example:
1,,,The teacher asks: “What’s the area of a triang...
2,,,A student responds: “Eighty square centimetres.”
3,,,Another student responds: “Forty square centim...
4,,,
...,...,...,...
57,,,Copying solutions from the board into a workbook.
58,,,
59,defining,"The teacher or student gives a clear meaning, ...",Example:
60,(giving a definition),,The teacher identifies the characteristics of ...


In [3]:
# the next row of a term can be an alternative term (if next is notna )
dfConcats = df
dfConcats['SameTerm'] =  (dfConcats.term.notna()) &(dfConcats.term.shift(-1).notna())
# show terms with an alternative 
dfConcats[((dfConcats.term.notna()) &(dfConcats.term.shift(-1).notna()))]

                                 

Unnamed: 0,term,descr,egs,SameTerm
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True
72,differentiating,"Any action in which instruction is modified, a...",Examples:,True
106,engaging,A student is actively involved with an educati...,Examples:,True
113,explaining,Make an idea or situation clear to someone by ...,Example:,True


In [4]:
# set the Alt description
dfConcats['alt'] = np.nan
dfConcats.loc[dfConcats.SameTerm==True,'alt'] = dfConcats.term.shift(-1)

dfConcats.head(62)


Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,,,The teacher asks: “What’s the area of a triang...,False,
2,,,A student responds: “Eighty square centimetres.”,False,
3,,,Another student responds: “Forty square centim...,False,
4,,,,False,
...,...,...,...,...,...
57,,,Copying solutions from the board into a workbook.,False,
58,,,,False,
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,(giving a definition),,The teacher identifies the characteristics of ...,False,


In [5]:
dfConcats.loc[dfConcats.SameTerm.shift()==True,'term']

60     (giving a definition)
73         (differentiation)
107             (engagement)
114            (explanation)
Name: term, dtype: object

In [6]:
# then null out the ones that aren't actually alt
dfConcats.loc[dfConcats.SameTerm.shift()==True,'term'] =np.nan
dfConcats.head(62)

Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,,,The teacher asks: “What’s the area of a triang...,False,
2,,,A student responds: “Eighty square centimetres.”,False,
3,,,Another student responds: “Forty square centim...,False,
4,,,,False,
...,...,...,...,...,...
57,,,Copying solutions from the board into a workbook.,False,
58,,,,False,
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,,,The teacher identifies the characteristics of ...,False,


In [7]:
df = dfConcats
# fill in terms for further grouping
df['term'].fillna(method='ffill',inplace=True)

df['term'] = df['term'].apply(lambda x: x.strip())
df.head()

Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,answering questions,,The teacher asks: “What’s the area of a triang...,False,
2,answering questions,,A student responds: “Eighty square centimetres.”,False,
3,answering questions,,Another student responds: “Forty square centim...,False,
4,answering questions,,,False,


In [8]:
df[df['term']=="applying"]

Unnamed: 0,term,descr,egs,SameTerm,alt
5,applying,An activity in which a taught procedure or con...,Examples:,False,
6,applying,,Having been taught Pythagoras theorem using tw...,False,
7,applying,,Having been taught strategies for solving simu...,False,
8,applying,,,False,
9,applying,,Non-example:,False,
10,applying,,Solving a pair of simultaneous equations after...,False,
11,applying,,,False,


In [9]:
# turn nan into spaces
df['descr'] = df['descr'].replace(np.nan,' ',regex=True)
df['egs'] = df['egs'].replace(np.nan,' ',regex=True)
df['alt'] = df['alt'].replace(np.nan,' ',regex=True)
df.head()

Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,answering questions,,The teacher asks: “What’s the area of a triang...,False,
2,answering questions,,A student responds: “Eighty square centimetres.”,False,
3,answering questions,,Another student responds: “Forty square centim...,False,
4,answering questions,,,False,


In [10]:
df[df['term']=="checking"]

Unnamed: 0,term,descr,egs,SameTerm,alt
34,checking,The process by which a teacher or student,Examples:,False,
35,checking,,The teacher makes notes in her chronicle to in...,False,
36,checking,"• checks answers, by determining the exactness...",The teacher makes a mental note or observation.,False,
37,checking,"• checks progress, by determining whether the ...",,False,
38,checking,,Non-example:,False,
39,checking,,Students annotate their workbook solutions.,False,


In [11]:
# concatenate examples split across rows
groupDescr = df.groupby('term')['descr'].apply(''.join)
groupDescr

term
(use of a) hook           The engaging introduction of a topic or sub-to...
answering questions       The activity of responding to a question or a ...
applying                  An activity in which a taught procedure or con...
assessment                An activity undertaken by teacher or students ...
assigning homework        The teacher assigns tasks to be completed outs...
                                                ...                        
summative assessment      Information is collected for the purpose of su...
test/testing              A situation in which individuals are required ...
wait time                 A deliberate pause before or after a question ...
whole class discussion    An activity in which the teacher and students ...
worked example            The teacher (or student) writes out the steps ...
Name: descr, Length: 61, dtype: object

In [12]:
groupEgs = df.groupby('term')['egs'].apply(' '.join)
groupEgs

term
(use of a) hook           Example: The teacher introduces polyhedra with...
answering questions       Example: The teacher asks: “What’s the area of...
applying                  Examples: Having been taught Pythagoras theore...
assessment                Examples: The teacher administers a test. The ...
assigning homework        Example: The teacher writes the homework on th...
                                                ...                        
summative assessment      Examples: Students complete a test that measur...
test/testing              Examples: Students complete a complex problem ...
wait time                 Example: The teacher says: “What is the area o...
whole class discussion    Example: The teacher invites students to share...
worked example            Example: The teacher writes out the solution t...
Name: egs, Length: 61, dtype: object

In [13]:
dfSumm = df[['term','alt']].drop_duplicates()
dfSumm.head(20)

Unnamed: 0,term,alt
0,answering questions,
5,applying,
12,assessment,
20,assigning homework,
26,board work,
34,checking,
40,clarifying,
46,collecting work,
52,correcting,
59,defining,(giving a definition)


In [14]:
df = pd.merge(dfSumm,groupDescr,on='term')
df = pd.merge(df,groupEgs,on='term')
df

Unnamed: 0,term,alt,descr,egs
0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...
1,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...
2,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...
3,assigning homework,,The teacher assigns tasks to be completed outs...,Example: The teacher writes the homework on th...
4,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...
...,...,...,...,...
60,summative assessment,,Information is collected for the purpose of su...,Examples: Students complete a test that measur...
61,test/testing,,A situation in which individuals are required ...,Examples: Students complete a complex problem ...
62,wait time,,A deliberate pause before or after a question ...,Example: The teacher says: “What is the area o...
63,whole class discussion,,An activity in which the teacher and students ...,Example: The teacher invites students to share...


In [15]:
# Grab the author categories
colNames = ('catdescr','term')
dfCat = pd.read_csv("AusCategories.csv",names=colNames,skiprows=1)
dfCat = dfCat.dropna().reset_index(drop=True)
dfCat

Unnamed: 0,catdescr,term
0,Assessment,"assessment, correcting, elicit understanding, ..."
1,Classroom Management,"disciplining, encouraging, giving praise, moni..."
2,Learning Strategies,"answering questions, applying, board work, che..."
3,Teaching Strategies,"answering questions, applying, assigning homew..."


In [17]:
# keep the index as the id for the category
dfCat['cat'] = ['cat'+str(int(x)) for x in dfCat.index]
dfCat

Unnamed: 0,catdescr,term,cat
0,Assessment,"assessment, correcting, elicit understanding, ...",cat0
1,Classroom Management,"disciplining, encouraging, giving praise, moni...",cat1
2,Learning Strategies,"answering questions, applying, board work, che...",cat2
3,Teaching Strategies,"answering questions, applying, assigning homew...",cat3


In [18]:
# explode terms into one row for each term for each category
dfCat.term = [x.split(',') for x in dfCat.term]
dfCat = dfCat.explode('term').reset_index(drop=True)
dfCat.term = [x.strip() for x in dfCat.term]
dfCat

Unnamed: 0,catdescr,term,cat
0,Assessment,assessment,cat0
1,Assessment,correcting,cat0
2,Assessment,elicit understanding,cat0
3,Assessment,feedback,cat0
4,Assessment,formative assessment,cat0
...,...,...,...
88,Teaching Strategies,student (individual) work,cat3
89,Teaching Strategies,summarising,cat3
90,Teaching Strategies,wait time,cat3
91,Teaching Strategies,whole class discussion,cat3


In [19]:
colNames= ['term', 'descr', 'cat']
df = pd.merge(df,dfCat,on='term')
#df = df[colNames]
df.head(20)

Unnamed: 0,term,alt,descr,egs,catdescr,cat
0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,Learning Strategies,cat2
1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,Teaching Strategies,cat3
2,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,Learning Strategies,cat2
3,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,Teaching Strategies,cat3
4,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,Assessment,cat0
5,assigning homework,,The teacher assigns tasks to be completed outs...,Example: The teacher writes the homework on th...,Teaching Strategies,cat3
6,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...,Learning Strategies,cat2
7,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...,Teaching Strategies,cat3
8,checking,,The process by which a teacher or student • ch...,Examples: The teacher makes notes in her chron...,Learning Strategies,cat2
9,checking,,The process by which a teacher or student • ch...,Examples: The teacher makes notes in her chron...,Teaching Strategies,cat3


In [20]:
df['egs'][10]

'Example: The teacher comments on each written line of a worked solution on the board.   Non-example: Repeating a statement made previously.  '

In [21]:
colnames = df.columns
colnames

Index(['term', 'alt', 'descr', 'egs', 'catdescr', 'cat'], dtype='object')

In [22]:

# create dodge columns for visualisation code
df['amount'] = np.random.randint(8,12,len(df.index))
df['id'] = df.index
# reorder columsn for visualisation code
colnames = ['id','term', 'catdescr','alt', 'descr', 'egs',  'cat','amount']
df = df[colnames]
df.head()

Unnamed: 0,id,term,catdescr,alt,descr,egs,cat,amount
0,0,answering questions,Learning Strategies,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat2,11
1,1,answering questions,Teaching Strategies,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,8
2,2,applying,Learning Strategies,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat2,10
3,3,applying,Teaching Strategies,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat3,11
4,4,assessment,Assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,cat0,9


In [26]:
df.iloc[20:30,:]

Unnamed: 0,id,term,catdescr,alt,descr,egs,cat,amount
20,20,differentiating,Teaching Strategies,,"Any action in which instruction is modified, a...",Examples: The teacher groups students for inst...,cat3,10
21,21,disciplining,Classroom Management,,The teacher identifies undesirable behaviour t...,Example: A teacher stops her activity and asks...,cat1,10
22,22,elaborating,Teaching Strategies,,A teacher or student provides additional infor...,Example: The teacher shades the fraction one-h...,cat3,10
23,23,elicit understanding,Assessment,,An activity undertaken by the teacher or stude...,Examples: The teacher asks a student to demons...,cat0,10
24,24,elicit understanding,Teaching Strategies,,An activity undertaken by the teacher or stude...,Examples: The teacher asks a student to demons...,cat3,8
25,25,encouraging,Classroom Management,,An action undertaken by the teacher for the pu...,"Examples: A teacher comments: ""Keep trying."" ""...",cat1,10
26,26,encouraging,Teaching Strategies,,An action undertaken by the teacher for the pu...,"Examples: A teacher comments: ""Keep trying."" ""...",cat3,11
27,27,engaging,Learning Strategies,(engagement),A student is actively involved with an educati...,Examples: A student keeps working on solving a...,cat2,11
28,28,engaging,Teaching Strategies,(engagement),A student is actively involved with an educati...,Examples: A student keeps working on solving a...,cat3,8
29,29,engaging,Learning Strategies,,A student is actively involved with an educati...,Examples: A student keeps working on solving a...,cat2,8


In [24]:
df.to_csv('AusData.csv',index=False)

In [194]:
# dodge data hack for bubble chart viz
# df[['Term','Views']].to_csv('data_fdi.csv',index=False)

In [28]:
df[['catdescr','cat']].drop_duplicates()

Unnamed: 0,catdescr,cat
0,Learning Strategies,cat2
1,Teaching Strategies,cat3
4,Assessment,cat0
21,Classroom Management,cat1


In [30]:
df[['catdescr','cat']].drop_duplicates().to_csv('AusCatData.csv',index=False)