In [1]:
import pandas as pd
import numpy as np

In [2]:
users_df = pd.read_csv('users.csv')
users_df

Unnamed: 0,user_id,first_name,last_name,house
0,1,Harry,Potter,Gryffindor
1,2,Ron,Wesley,Gryffindor
2,3,Hermonie,Granger,Gryffindor
3,4,Ginny,Weasley,Gryffindor
4,5,Draco,Malfoy,Slytherin
5,6,Tom,Riddle,Slytherin
6,7,Luna,Lovegood,Ravenclaw
7,8,Cho,Chang,Ravenclaw
8,9,Cedric,Diggory,Hufflepuff


In [3]:
users_df['full_name'] = users_df['first_name'] +' ' +users_df['last_name']

In [5]:
follows_df = pd.read_csv('follows.csv')
follows_df

Unnamed: 0,user_id,follows,date
0,1,2,1993-09-01
1,2,1,1989-01-01
2,3,1,1993-07-01
3,2,3,1994-10-10
4,3,2,1995-03-01
5,4,2,1988-08-08
6,4,1,1988-08-08
7,1,4,1994-04-02
8,1,5,2000-01-01
9,5,1,2000-01-02


In [6]:
# How many users are there in each house?
users_df['house'].value_counts()

Gryffindor    4
Slytherin     2
Ravenclaw     2
Hufflepuff    1
Name: house, dtype: int64

In [7]:
#List all links that were created before September 1st, 1993
lst_follows_after_sept1_93 = []
for index, row in follows_df.iterrows():
    if row['date'] < '1993-09-01':
        lst_follows_after_sept1_93.append(row)
        

len(lst_follows_after_sept1_93) 
links_df = pd.DataFrame(lst_follows_after_sept1_93)
links_df

Unnamed: 0,user_id,follows,date
1,2,1,1989-01-01
2,3,1,1993-07-01
5,4,2,1988-08-08
6,4,1,1988-08-08
10,5,6,1986-01-10
11,7,1,1990-02-02


In [8]:
#List all the links established before September 1st 1993, but this time use the users names

merged_df = users_df.merge(follows_df[['follows', 'date', 'user_id']], how='outer')
merged_df.drop(['first_name', 'last_name', 'house'], axis=1, inplace=True)


user_names = dict(zip(merged_df.user_id, merged_df.full_name))
merged_df.follows.replace(user_names, inplace=True)
merged_df.drop(['user_id'], axis=1, inplace=True)
merged_df_before_sept_93 = merged_df.loc[merged_df['date'] < '1993-09-01']

In [11]:
#Give a count of how many people followed each user as of 1999-12-31. Give the result in term of "users full name, number of followers
mask = merged_df['date'] > '1999-12-31'
merged_count = merged_df.loc[mask].groupby([merged_df['follows'], merged_df['date']]).size().reset_index(name='count')


In [12]:
#List all rows where someone from one house follows someone from a different house.
dict_houses = {'Harry Potter': 'Gryffindor', 'Ron Wesley': 'Gryffindor', 'Hermonie Granger': 'Gryffindor', 'Ginny Weasley': 'Gryffindor', 'Draco Malfoy': 'Slytherin', 'Tom Riddle': 'Slytherin', 'Luna Lovegood': 'Ravenclaw', 'Cho Chang': 'Ravenclaw', 'Cedric Diggory': 'Hufflepuff'}
merged_df['full_name_house'] = merged_df['full_name'].map(dict_houses)
merged_df['followee_house'] = merged_df['follows'].map(dict_houses)
df_diff_houses = merged_df.loc[merged_df['full_name_house'] != merged_df['followee_house']]


Unnamed: 0,full_name,follows,date,full_name_house,followee_house
0,Harry Potter,Ron Wesley,1993-09-01,Gryffindor,Gryffindor
1,Harry Potter,Ginny Weasley,1994-04-02,Gryffindor,Gryffindor
2,Harry Potter,Draco Malfoy,2000-01-01,Gryffindor,Slytherin
3,Harry Potter,Luna Lovegood,1996-10-01,Gryffindor,Ravenclaw
4,Harry Potter,Cho Chang,1993-09-03,Gryffindor,Ravenclaw
5,Ron Wesley,Harry Potter,1989-01-01,Gryffindor,Gryffindor
6,Ron Wesley,Hermonie Granger,1994-10-10,Gryffindor,Gryffindor
7,Hermonie Granger,Harry Potter,1993-07-01,Gryffindor,Gryffindor
8,Hermonie Granger,Ron Wesley,1995-03-01,Gryffindor,Gryffindor
9,Hermonie Granger,Cedric Diggory,1996-05-30,Gryffindor,Hufflepuff


In [23]:
#List all unrequited followings (i.e. where A follows B but B does not follow A)


unrequited = []
for index, row in merged_df.iterrows():
    followee = row["follows"]
    follower = row["full_name"]
    
    # search in the dataframe if the reverse relationship exists
    reverse = merged_df[(merged_df["follows"] == follower) & (merged_df["full_name"] == followee)]
    
    # if the reverse relationship does not exist, append the followee and follower to the results list
    if len(reverse) == 0:
        unrequited.append([followee, follower])
        
print(unrequited)

[['Harry Potter', 'Hermonie Granger'], ['Cedric Diggory', 'Hermonie Granger'], ['Ron Wesley', 'Ginny Weasley'], ['Cedric Diggory', 'Ginny Weasley'], ['Tom Riddle', 'Draco Malfoy'], [nan, 'Tom Riddle'], ['Cho Chang', 'Luna Lovegood']]
