In [1]:
# pick up any changes to modules before executing
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

import projects_parser

In [3]:
BOOKLET_FILE = '../booklets_text/2020.xlsx'

In [4]:
df = pd.read_excel(BOOKLET_FILE)

In [5]:
df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3
0,Project Title A Simple So...,,,
1,Project Title Agent Based...,,,
2,,,,
3,,,,
4,,,,
...,...,...,...,...
3771,,,,
3772,,,,
3773,,,,
3774,Project Title 2D to 3D bo...,,,


In [6]:
normalized_df = df.dropna(how='all').dropna(axis='columns', how='all').reset_index(drop=True)
normalized_df.columns = [0]

In [7]:
normalized_df

Unnamed: 0,0
0,Project Title A Simple So...
1,Project Title Agent Based...
2,Project Title AudioFlow\n...
3,Project Title BioRhythmic...
4,Project Title Chirp – Aud...
...,...
170,Project Title The Analysi...
171,Project Title Under-water...
172,Project Title VERA – Vale...
173,Project Title 2D to 3D bo...


In [8]:
data_list = normalized_df.values.tolist()

In [9]:
print(data_list[:3])
print(len(data_list))

[['Project Title                      A Simple Society – Game Theory\nName                               Jamie Hyland\nEmail                                jamie.hyland26@mail.dcu.ie\nName                               Alex Thornberry\nEmail                                alex.thornberry3@mail.dcu.ie\nProgramme                       Computer Applications\nSupervisor                        alistair.sutherland@dcu.ie\nThis project aims to find the optimal strategies for agents to employ in order to survive. Agents will be passive or aggressive to simulate a predator/prey scenario. The system will study the key concepts\nfound in Game Theory such as cooperation, deception, sharing of resources and so on. Users also have the ability to change key aspects of agents behavior and their surroundings and watch them react in real-time.\nProject Video                    shorturl.at/rxEH4\nProject Area                      3-D Modelling, Artificial Intelligence, Gaming, Simulation Project Technolo

In [10]:
projects_strings_raw = [s[0] for s in data_list]

In [11]:
print(projects_strings_raw[:3])
print(len(projects_strings_raw))

['Project Title                      A Simple Society – Game Theory\nName                               Jamie Hyland\nEmail                                jamie.hyland26@mail.dcu.ie\nName                               Alex Thornberry\nEmail                                alex.thornberry3@mail.dcu.ie\nProgramme                       Computer Applications\nSupervisor                        alistair.sutherland@dcu.ie\nThis project aims to find the optimal strategies for agents to employ in order to survive. Agents will be passive or aggressive to simulate a predator/prey scenario. The system will study the key concepts\nfound in Game Theory such as cooperation, deception, sharing of resources and so on. Users also have the ability to change key aspects of agents behavior and their surroundings and watch them react in real-time.\nProject Video                    shorturl.at/rxEH4\nProject Area                      3-D Modelling, Artificial Intelligence, Gaming, Simulation Project Technolog

In [12]:
# Transform certain characters

projects_strings = [projects_parser.normalize_characters(project_line) for project_line in projects_strings_raw]

In [13]:
print(projects_strings[:3])
print(len(projects_strings))

['Project Title                      A Simple Society – Game Theory\nName                               Jamie Hyland\nEmail                                jamie.hyland26@mail.dcu.ie\nName                               Alex Thornberry\nEmail                                alex.thornberry3@mail.dcu.ie\nProgramme                       Computer Applications\nSupervisor                        alistair.sutherland@dcu.ie\nThis project aims to find the optimal strategies for agents to employ in order to survive. Agents will be passive or aggressive to simulate a predator/prey scenario. The system will study the key concepts\nfound in Game Theory such as cooperation, deception, sharing of resources and so on. Users also have the ability to change key aspects of agents behavior and their surroundings and watch them react in real-time.\nProject Video                    shorturl.at/rxEH4\nProject Area                      3-D Modelling, Artificial Intelligence, Gaming, Simulation Project Technolog

In [14]:
regex_schema = {
    'title': r'Project Title(.*?)Name',
    'students': r'Name(.*?)Programme',
    'programme': r'Programme(.*)Supervisor',
    'supervisor_and_description': r'Supervisor(.*)Project Video',
    'video': r'Project Video(.*?)Project Area',
    'area': r'Project Area(.*?)Project Technology',
    'technology': r'Project Technology(.*?)$',
}

In [15]:
projects = projects_parser.parse_projects(projects_strings, regex_schema)

In [16]:
projects

[{'title': '                      A Simple Society – Game Theory\n',
  'students': '                               Jamie Hyland\nEmail                                jamie.hyland26@mail.dcu.ie\nName                               Alex Thornberry\nEmail                                alex.thornberry3@mail.dcu.ie\n',
  'programme': '                       Computer Applications\n',
  'supervisor_and_description': '                        alistair.sutherland@dcu.ie\nThis project aims to find the optimal strategies for agents to employ in order to survive. Agents will be passive or aggressive to simulate a predator/prey scenario. The system will study the key concepts\nfound in Game Theory such as cooperation, deception, sharing of resources and so on. Users also have the ability to change key aspects of agents behavior and their surroundings and watch them react in real-time.\n',
  'video': '                    shorturl.at/rxEH4\n',
  'area': '                      3-D Modelling, Artificial

In [17]:
projects_final = projects_parser.canonicalize_projects(projects)

In [18]:
projects_final

[{'title': 'A Simple Society – Game Theory',
  'students': [{'name': 'Jamie Hyland', 'email': 'jamie.hyland26@mail.dcu.ie'},
   {'name': 'Alex Thornberry', 'email': 'alex.thornberry3@mail.dcu.ie'}],
  'programme': 'Computer Applications',
  'video': 'shorturl.at/rxEH4',
  'area': '3-D Modelling, Artificial Intelligence, Gaming, Simulation',
  'technology': 'C#, Unity',
  'supervisor': 'alistair.sutherland@dcu.ie',
  'description': 'This project aims to find the optimal strategies for agents to employ in order to survive. Agents will be passive or aggressive to simulate a predator/prey scenario. The system will study the key concepts found in Game Theory such as cooperation, deception, sharing of resources and so on. Users also have the ability to change key aspects of agents behavior and their surroundings and watch them react in real-time.'},
 {'title': 'Agent Based Model to Simulate the Effects of Inequality on Crime',
  'students': [{'name': 'Oishin Smith', 'email': 'oishin.smith25@

In [19]:
projects_parser.write_json('2020', projects_final)