Roll No.: 215229145<br>Name.: Yokeshwaran G

# Lab 5:Retrieving the user’s LinkedIn Profile and analysing the profile’s connections

#**Profile Analysis**

In [1]:
import os
import csv
import numpy as np

# Point this to your 'Connections.csv' file.
CSV_FILE = os.path.join('resources', 'ch04-linkedin', 'Connections.csv')

csvReader = csv.DictReader(open("Connections.csv"), delimiter=',', quotechar='"')
contacts = [row for row in csvReader]

### Simple normalization of company suffixes from address book data


In [2]:
from prettytable import PrettyTable # pip install prettytable
from collections import Counter
from operator import itemgetter

# Define a set of transforms that converts the first item
# to the second item. Here, we're simply handling some
# commonly known abbreviations, stripping off common suffixes, 
# etc.

transforms = [(', Inc.', ''), (', Inc', ''), (', LLC', ''), (', LLP', ''),
               (' LLC', ''), (' Inc.', ''), (' Inc', '')]

companies = [c['Company'].strip() for c in contacts if c['Company'].strip() != '']

for i, _ in enumerate(companies):
    for transform in transforms:
        companies[i] = companies[i].replace(*transform)

pt = PrettyTable(field_names=['Company', 'Freq'])
pt.align = 'l'
c = Counter(companies)

[pt.add_row([company, freq]) for (company, freq) in sorted(c.items(), key=itemgetter(1), reverse=True) if freq > 1]

print(pt)


+--------------------------------------------------+------+
| Company                                          | Freq |
+--------------------------------------------------+------+
| Bishop Heber College, Tiruchirappalli - 620 017. | 9    |
+--------------------------------------------------+------+


### Standardizing common job titles and computing their frequencies

In [3]:
transforms = [
    ('Sr.', 'Senior'),
    ('Sr', 'Senior'),
    ('Jr.', 'Junior'),
    ('Jr', 'Junior'),
    ('CEO', 'Chief Executive Officer'),
    ('COO', 'Chief Operating Officer'),
    ('CTO', 'Chief Technology Officer'),
    ('CFO', 'Chief Finance Officer'),
    ('VP', 'Vice President'),
    ]

# Read in a list of titles and split apart
# any combined titles like "President/CEO."
# Other variations could be handled as well, such
# as "President & CEO", "President and CEO", etc.

titles = []
for contact in contacts:
    titles.extend([t.strip() for t in contact['Position'].split('/')
                  if contact['Position'].strip() != ''])

# Replace common/known abbreviations

for i, _ in enumerate(titles):
    for transform in transforms:
        titles[i] = titles[i].replace(*transform)

# Print out a table of titles sorted by frequency

pt = PrettyTable(field_names=['Job Title', 'Freq'])
pt.align = 'l'
c = Counter(titles)
[pt.add_row([title, freq]) 
 for (title, freq) in sorted(c.items(), key=itemgetter(1), reverse=True) 
     if freq > 1]
print(pt)

# Print out a table of tokens sorted by frequency

tokens = []
for title in titles:
    tokens.extend([t.strip(',') for t in title.split()])
pt = PrettyTable(field_names=['Token', 'Freq'])
pt.align = 'l'
c = Counter(tokens)
[pt.add_row([token, freq]) 
 for (token, freq) in sorted(c.items(), key=itemgetter(1), reverse=True) 
     if freq > 1 and len(token) > 2]
print(pt)

+---------------------+------+
| Job Title           | Freq |
+---------------------+------+
| Assistant Professor | 3    |
| Guest Lecturer      | 2    |
+---------------------+------+
+-----------+------+
| Token     | Freq |
+-----------+------+
| Professor | 5    |
| Associate | 4    |
| Data      | 3    |
| Assistant | 3    |
| Technical | 2    |
| Intern    | 2    |
| and       | 2    |
| Founder   | 2    |
| Business  | 2    |
| Guest     | 2    |
| Lecturer  | 2    |
| Student   | 2    |
| Trainee   | 2    |
+-----------+------+


### Geocoding locations with Google Maps

#### NOTE:This section contains no locations because there is no geocode generated

In [4]:
for i, c in enumerate(contacts):
    progress = '{0:3d} of {1:3d} - '.format(i+1,len(contacts))
    company = c['Company']
    try:
        location = g.geocode(company, exactly_one=True)
    except:
        print('... Failed to get a location for {0}'.format(company))
        location = None
    
    if location != None:
        c.update([('Location', location)])
        print(progress + company[:50] + ' -- ' + location.address)
    else:
        c.update([('Location', None)])
        print(progress + company[:50] + ' -- ' + 'Unknown Location')

... Failed to get a location for Synapse Business Systems Inc 
  1 of  68 - Synapse Business Systems Inc  -- Unknown Location
... Failed to get a location for Ford Motor Company
  2 of  68 - Ford Motor Company -- Unknown Location
... Failed to get a location for 
  3 of  68 -  -- Unknown Location
... Failed to get a location for 
  4 of  68 -  -- Unknown Location
... Failed to get a location for 
  5 of  68 -  -- Unknown Location
... Failed to get a location for 
  6 of  68 -  -- Unknown Location
... Failed to get a location for Bishop Heber College, Tiruchirappalli - 620 017.
  7 of  68 - Bishop Heber College, Tiruchirappalli - 620 017. -- Unknown Location
... Failed to get a location for 
  8 of  68 -  -- Unknown Location
... Failed to get a location for 
  9 of  68 -  -- Unknown Location
... Failed to get a location for 
 10 of  68 -  -- Unknown Location
... Failed to get a location for 
 11 of  68 -  -- Unknown Location
... Failed to get a location for Bishop Heber College, Tiruchi