This notebook details my process of grabbing info from Internet Archive and using it to create a CSV of representatives of the 1999-2003 term.

https://web.archive.org/web/20020724040306/http://www.nigeriacongress.org/reps/repslist.asp

In [62]:
import bs4
import pandas as pd

In [6]:
with open('../archive/The House Of Representatives of the Federal Republic of Nigeria.html') as f:
  # Need to delete one character in original html file!
  soup = bs4.BeautifulSoup(f)

soup.prettify()

'<html data-lt-installed="true">\n <!-- #BeginTemplate "/Templates/reps.dwt" -->\n <head>\n  <!-- is_embed=False -->\n  <script>\n   const observer = new PerformanceObserver((list) => {\n    list.getEntries().forEach((entry) => {\n      console.log(\'%o\', entry);\n    })\n  });\n  observer.observe({type: "navigation", buffered: true});\n  </script>\n  <script src="The%20House%20Of%20Representatives%20of%20the%20Federal%20Republic%20of%20Nigeria_files/athena.js" type="text/javascript">\n  </script>\n  <script type="text/javascript">\n   window.addEventListener(\'DOMContentLoaded\',function(){var v=archive_analytics.values;v.service=\'wb\';v.server_name=\'wwwb-app28.us.archive.org\';v.server_ms=314;archive_analytics.send_pageview({});});\n  </script>\n  <script charset="utf-8" src="The%20House%20Of%20Representatives%20of%20the%20Federal%20Republic%20of%20Nigeria_files/bundle-playback.js" type="text/javascript">\n  </script>\n  <script charset="utf-8" src="The%20House%20Of%20Representati

In [13]:
# very brittle code to get table with all the representatives
representatives = None
for table in soup.find_all('table'):
  representatives = table
  if table['cellspacing'] == '3':
    break

In [53]:
class RepInfo:
  def __init__(self, name, party, state, constituency):
    self.name = name
    self.party = party
    self.state = state
    self.constituency = constituency
  
  def __str__(self):
    return f'{self.name} ({self.party}): {self.constituency} in {self.state}'

In [61]:
rep_infos = []
for representative in representatives.find_all('p'):
  name = representative.find('a').text.strip()
  name_array = name.split(' ')
  rep_name = ' '.join(name_array[1:]) + ' ' + name_array[0]

  state = representative.find_all('font')[3].text
  
  constituency = representative.find_all('font')[5].text
  constituency = constituency[:constituency.index('Political')].strip()
  
  party = representative.find_all('font')[5].text
  party = party[(party.index(':') + 1):].strip()
  
  rep_infos.append(RepInfo(rep_name, party, state, constituency))

for rep in rep_infos[:5]:
  print(rep)

JosiahGobum Binuwai (PDP): Pankshin/Kanke/Kanam in Plateau State
IbrahimBio Isa (APP): Baruten/Kaiama in Kwara State
 LawrenceOlufemi Kehinde (AD): Ayedire/Iwo/Ola-Oluwa in Osun State
TonyeTamuno Longjohn (PDP): Bonny/Degema in Rivers State
 IdowuRufus Oluwatayo (AD): Obokun/Oriade in Osun State


In [65]:
reps = pd.DataFrame(columns=['Name', 'Party', 'State', 'Constituency'])

for rep in rep_infos:
  series = pd.Series({'Name': rep.name, 'Party': rep.party, 'State': rep.state, 'Constituency': rep.constituency})
  reps = pd.concat([reps, series.to_frame().T])

reps.to_csv('../archive/online-1999.csv', index=False)

Need to add whitespace in names where necessary

Also corrected Okwudili Uzoka's State

In [96]:
reps = pd.read_csv('../archive/online-1999.csv')

In [97]:
reps = reps.sort_values('State')
reps

Unnamed: 0,Name,Party,State,Constituency
94,Nwakanma Chimaobi,PDP,Abia State,Isiala Ngwa North/South
95,Macebuh Chinonyerem,PDP,Abia State,Ukwa East/Ukwa west
119,Anthony Eze Enwereuzor,APP,Abia State,Aba North/Aba South
200,Uchechukwu N. Maduako,PDP,Abia State,Isuikwuato/Umunneochi
232,Njoku Nnamdi,PDP,Abia State,Bende
...,...,...,...,...
141,Lawali Ibrahim Nasarawa Godal,APP,Zamfara State,Kaura Namoda/Birnin Magaji
16,Makwashi Abubakar,APP,Zamfara State,Bakura/Maradun
41,Sahabi Aliyu,APP,Zamfara State,Gummi/Bukkuyum
296,Anka Mohammed Sani,PDP,Zamfara State,Anka/Mafara


In [98]:
reps.to_csv('../csv/online-1999.csv', index=False)

In [91]:
book_ref = pd.read_csv('../csv/assemblies/img_house_1999 - 2003.csv')
book_ref

Unnamed: 0,Name,Party,Constituency,Date of Birth,Education,State
0,Enwereuzor Eze Anthony,APP,Aba North/South,18/08/1951,(MBA) Finance,Abia
1,Ohuabunwa M. Arukwe,PDP,Arochukwu/Ohafia,24/05/1957,HND/Bachelor of Technology in Microbiology Bsc.,Abia
2,Macebuh Chinonyerem,PDP,Ukwa East/Ukwa West,8/07/1956,Bachelor Engineering,Abia
3,Iheanacho Obioma,PDP,Ikwuano/Umuhia North/South,17/04/1960,Bachelor Degree in Econs,Abia
4,Nwakanma Chimaobi,PDP,Isiala Ngwa North/ South,16/04/1944,Masters Degree in Marketing,Abia
...,...,...,...,...,...,...
352,Mohammed Sani,ANPP,Tsafe/Gusau,October 1951,Secondary School Cert.,Zamfara
353,Sahabi Aliyu,APP,Gummi/Bukkuyum,01/10/1960,Bachelor Degree in Ed (Arts),Zamfara
354,Sani Anka Mohammed,PDP,Anka/Mafara,04/08/1958,Diploma in Law,Zamfara
355,Yususf Usman B.,PDP,Kwali/ Gwagwalada/ Kuje and Abaji,09/09/1961,Master Degree in Agric. Business,Federal Capital Territory


In [92]:
def state_member_info(state):
  print(reps[['Name', 'Party', 'Constituency']][reps['State'] == f'{state} State'].sort_values('Constituency'))
  print(book_ref[['Name', 'Party', 'Constituency']][book_ref['State'] == state].sort_values('Constituency'))

In [93]:
state_member_info('Abia')

                          Name Party                       Constituency
119     Anthony Eze Enwereuzor   APP                Aba North/Aba South
259       Arukwe Mao Ohuabunwa   PDP                   Arochukwu/Ohafia
232               Njoku Nnamdi   PDP                              Bende
94           Nwakanma Chimaobi   PDP            Isiala Ngwa North/South
200      Uchechukwu N. Maduako   PDP              Isuikwuato/Umunneochi
258  Clifford Ezekesiri Ohiagu   PDP         Obingwa/Ugwunagbo/Osisioma
95         Macebuh Chinonyerem   PDP                Ukwa East/Ukwa west
243           Iheanacho Obioma   PDP  Umuahia/North South/Ikwu/Ikwuhado
                     Name Party                 Constituency
0  Enwereuzor Eze Anthony   APP              Aba North/South
1     Ohuabunwa M. Arukwe   PDP             Arochukwu/Ohafia
5            Njoku Nnamdi   PDP                        Bende
3        Iheanacho Obioma   PDP   Ikwuano/Umuhia North/South
4       Nwakanma Chimaobi   PDP     Isiala Ngwa

In [94]:
reps['State'].unique()

array(['Abia State', 'Adamawa State', 'Akwa Ibom State', 'Anambra State',
       'Bauchi State', 'Bayelsa State', 'Benue State', 'Borno State',
       'Cross River State', 'Delta State', 'Ebonyi State', 'Edo State',
       'Ekiti State', 'Enugu State',
       'FCT, Abuja (Federal Capital  Territory)', 'Gombe State',
       'Imo State', 'Jigawa State', 'Kaduna State', 'Kano State',
       'Katsina State', 'Kebbi State', 'Kogi State', 'Kwara State',
       'Lagos State', 'Nassarawa State', 'Niger State', 'Ogun State',
       'Ondo State', 'Osun State', 'Oyo State', 'Plateau State',
       'Rivers State', 'Sokoto State', 'Taraba State', 'Yobe State',
       'Zamfara State'], dtype=object)