# Creating the `Full_Friends_Lines` CSV  
#### This CSV has each of the lines by character by episodes of the show as its own row

### Imports

In [1]:
import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import CountVectorizer

# Text preprocessing steps - remove numbers, captial letters and punctuation
import re
import string

### Reading In Friends Data

In [2]:
friends = pd.read_csv('friends_quotes.csv')

In [3]:
s1e16 = pd.read_csv('s1e16.csv')

In [4]:
s9e11 = pd.read_csv('s9e11.csv')

In [5]:
s9e13 = pd.read_csv('s9e13.csv')

In [6]:
s4e24 = pd.read_csv('s4e24.csv')

In [7]:
s4e24

Unnamed: 0.1,Unnamed: 0,episode_number,season,episode_title,quote_order,author,quote
0,0,24,4,The One With Ross Wedding,1,Housekeeper,The Waltham Residence.
1,1,24,4,The One With Ross Wedding,2,Phoebe,Oh...yes..is this..umm..Emily’s Parents’ house.
2,2,24,4,The One With Ross Wedding,3,Housekeeper,This is the housekeeper speaking. And by the ...
3,3,24,4,The One With Ross Wedding,4,Phoebe,(In a British accent) This is Phoebe Buffay. ...
4,4,24,4,The One With Ross Wedding,5,Housekeeper,"Miss Waltham, is at the rehearsal dinner and ..."
...,...,...,...,...,...,...,...
223,223,24,4,The One With Ross Wedding,224,Minster,"Now Ross, repeat after me. I Ross..."
224,224,24,4,The One With Ross Wedding,225,Ross,I Ross...
225,225,24,4,The One With Ross Wedding,226,Minister,"Take thee, Emily..."
226,226,24,4,The One With Ross Wedding,227,Ross,"Take thee, Rachel...(All his friends have loo..."


In [8]:
friends

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,Monica,1,Monica Gets A Roommate,There's nothing to tell! He's just some guy I ...,0,1
1,Joey,1,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's ...",1,1
2,Chandler,1,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hum...",2,1
3,Phoebe,1,Monica Gets A Roommate,"Wait, does he eat chalk?",3,1
4,Phoebe,1,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through w...",4,1
...,...,...,...,...,...,...
59827,Chandler,17,"The Last One, Part I & II","Oh, it's gonna be okay.",581,10
59828,Rachel,17,"The Last One, Part I & II",(crying) Do you guys have to go to the new hou...,582,10
59829,Monica,17,"The Last One, Part I & II",We got some time.,583,10
59830,Rachel,17,"The Last One, Part I & II","Okay, should we get some coffee?",584,10


In [9]:
friends = pd.concat([friends, s1e16, s9e11, s9e13, s4e24], ignore_index=True)

In [10]:
friends.drop('Unnamed: 0', axis=1, inplace=True)

In [11]:
friends

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,Monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I ...,0.0,1.0
1,Joey,1.0,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's ...",1.0,1.0
2,Chandler,1.0,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hum...",2.0,1.0
3,Phoebe,1.0,Monica Gets A Roommate,"Wait, does he eat chalk?",3.0,1.0
4,Phoebe,1.0,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through w...",4.0,1.0
...,...,...,...,...,...,...
60855,Minster,24.0,The One With Ross Wedding,"Now Ross, repeat after me. I Ross...",224.0,4.0
60856,Ross,24.0,The One With Ross Wedding,I Ross...,225.0,4.0
60857,Minister,24.0,The One With Ross Wedding,"Take thee, Emily...",226.0,4.0
60858,Ross,24.0,The One With Ross Wedding,"Take thee, Rachel...(All his friends have loo...",227.0,4.0


### Replacing Names in File so that All main characters have the same name

In [12]:
friends['author'].replace({'CHAN':'Chandler','CHANDLER':'Chandler', 'Chandlers':'Chandler', 'chandler':'Chandler',
                    'JOEY':'Joey',
                    'MNCA':'Monica','MONICA':'Monica',
                    'PHOE':'Phoebe','PHOEBE':'Phoebe', 'Pheebs':'Phoebe',
                    'Rache':'Rachel','RACHEL':'Rachel', 'RACH':'Rachel',
                    'ROSS':'Ross'},inplace=True)

In [13]:
friends.author.unique().shape

(1003,)

### Creating a Function to combine each characters quotes from each season into just one document per episode

In [14]:
def create_table_season(data, season_length, season_number):
    column_names = ['author', 'quote', 'season', 'episode_number']
    df = pd.DataFrame(columns = column_names)
    friends_list = ['Monica', 'Joey', 'Rachel', 'Ross', 'Phoebe', 'Chandler']
    for i in range(1,season_length+1):
        for friend in friends_list:
            new_data = data[(data['episode_number'] == int(i)) 
                            & (data['author'] == friend) 
                            & (data['season'] == season_number)][['author', 'quote', 'season', 'episode_number']].groupby('author').agg({'quote':lambda x: ' '.join(x), 'season':'first', 'episode_number':'first'}).reset_index()
            df = pd.concat([df, new_data], ignore_index=True)
    return df        
        

#### For all the next few, actually creating ten dataframes for the ten seasons

In [15]:
season_1 = create_table_season(friends, 24, 1)

In [16]:
season_1

Unnamed: 0,author,quote,season,episode_number
0,Monica,There's nothing to tell! He's just some guy I ...,1.0,1.0
1,Joey,"C'mon, you're going out with the guy! There's ...",1.0,1.0
2,Rachel,Oh God Monica hi! Thank God! I just went to yo...,1.0,1.0
3,Ross,(mortified) Hi. I just feel like someone reach...,1.0,1.0
4,Phoebe,"Wait, does he eat chalk? Just, 'cause, I don't...",1.0,1.0
...,...,...,...,...
139,Joey,"Hey, Chan, can you help me out here? I promise...",1.0,24.0
140,Rachel,"Oh, let me see! (grabs picture) Oh, God, is he...",1.0,24.0
141,Ross,And here's little Ben nodding off... (quietly)...,1.0,24.0
142,Phoebe,"Oh, look, he's got Ross's haircut! Wow, ooh, y...",1.0,24.0


In [17]:
season_1.reset_index(inplace=True)

In [18]:
season_1.drop('index', axis=1, inplace=True)

In [21]:
season_1.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [22]:
season_2 = create_table_season(friends, 24, 2)

In [23]:
season_2

Unnamed: 0,author,quote,season,episode_number
0,Monica,"I'm telling you, she went to the airport, and ...",2.0,1.0
1,Joey,"Um, this? What up?! (answering the phone) Hell...",2.0,1.0
2,Rachel,Oh my god. Oh my god. (She decides to make a b...,2.0,1.0
3,Ross,"(seeing her) Rach! It was, it was great. Oh, w...",2.0,1.0
4,Phoebe,"Ok, so this is pretty much what's happened so ...",2.0,1.0
...,...,...,...,...
139,Joey,Incredible! I met the director this time and y...,2.0,24.0
140,Rachel,"Hey Joey, how'd the audition go? Yeah, right. ...",2.0,24.0
141,Ross,"Well, hey. You're an actor, I say you just suc...",2.0,24.0
142,Phoebe,"Well, come on, who cares what that guy thinks....",2.0,24.0


In [24]:
season_2.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [25]:
season_3 = create_table_season(friends, 25, 3)

In [26]:
season_3

Unnamed: 0,author,quote,season,episode_number
0,Monica,"God, look what I found in the drain. It's some...",3.0,1.0
1,Joey,I'm tellin' ya that girl totally winked at me....,3.0,1.0
2,Rachel,Nooo Way! The most romantic song ever is The W...,3.0,1.0
3,Ross,I have to say Tupolo Honey by Van Morrison. He...,3.0,1.0
4,Phoebe,"See, I-I think that one that Elton John wrote ...",3.0,1.0
...,...,...,...,...
145,Joey,"Yeah, sure. Well y'know, earlier she was talki...",3.0,25.0
146,Rachel,(to Monica) I mean is that woman capable of ta...,3.0,25.0
147,Ross,"Thats too bad. Yeah? What about ah, that bike...",3.0,25.0
148,Phoebe,"(entering, hurridly) Hey, you guys! Look what ...",3.0,25.0


In [27]:
season_3.episode_number.unique().shape

(25,)

In [28]:
season_4 = create_table_season(friends, 24, 4)

In [29]:
season_4

Unnamed: 0,author,quote,season,episode_number
0,Monica,(closing the fridge in disgust) Shoot! Were o...,4.0,1.0
1,Joey,"(lying on a beach towel, recapping what happen...",4.0,1.0
2,Rachel,"Oh, youre welcome a million. (softly) Oh my G...",4.0,1.0
3,Ross,"(surprised) Hi! Awww. Yeah. (closes the door, ...",4.0,1.0
4,Phoebe,"Ehh? I-I mean I, well I think I can figure it ...",4.0,1.0
...,...,...,...,...
139,Joey,What’s in it? (Looking up at the waiter)That...,4.0,24.0
140,Rachel,"(Running to the ticket counter) Ooh, ooh, ooh...",4.0,24.0
141,Ross,(Ross hugs his mom and dad)Hi. Mom. Dad. wha...,4.0,24.0
142,Phoebe,Oh...yes..is this..umm..Emily’s Parents’ hous...,4.0,24.0


In [30]:
season_4.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [31]:
season_5 = create_table_season(friends, 24, 5)

In [32]:
season_5

Unnamed: 0,author,quote,season,episode_number
0,Monica,Hey. I know. How could we have let this happen...,5.0,1.0
1,Joey,"Well, that went well. Yeah. (To the gang) Hey,...",5.0,1.0
2,Rachel,(To the woman sitting in front of her) He-he s...,5.0,1.0
3,Ross,"I Ross... Take thee, Rachel...(All his friends...",5.0,1.0
4,Phoebe,"(On the phone, in New York) Uh, hello, this is...",5.0,1.0
...,...,...,...,...
139,Joey,(entering) Hey-hey-hey you made it! All right!...,5.0,24.0
140,Rachel,Ahh. Hello! (She makes a face and the kid laug...,5.0,24.0
141,Ross,I think the check in is that way. (Points) Pho...,5.0,24.0
142,Phoebe,(sees Ross and Rachel) Hey! You guys are here!...,5.0,24.0


In [33]:
season_5.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [34]:
season_6 = create_table_season(friends, 25, 6)

In [35]:
season_6

Unnamed: 0,author,quote,season,episode_number
0,Monica,Whoa! What are you guys doing here? Why else w...,6.0,1.0
1,Joey,(entering with Phoebe) Come on Pheebs! Hurry! ...,6.0,1.0
2,Rachel,Ohhh! (Looks in the mirror and sees that she s...,6.0,1.0
3,Ross,Why are we in bed together? (checks) Yeah. No!...,6.0,1.0
4,Phoebe,Okay! Okay! Okay! (They run into the chapel.) ...,6.0,1.0
...,...,...,...,...
145,Joey,"Hey uh, have you guys scene Chandler? Hey Moni...",6.0,25.0
146,Rachel,"Isnt it incredible?! Monica and Chandler, get...",6.0,25.0
147,Ross,"What happened you guys? Rach! Hey, come on in....",6.0,25.0
148,Phoebe,"I know, theyre gonna be so happy together. No...",6.0,25.0


In [36]:
season_6.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.])

In [37]:
season_7 = create_table_season(friends, 24, 7)

In [38]:
season_7

Unnamed: 0,author,quote,season,episode_number
0,Monica,(yelling at the top of her lungs) Im engaged!...,7.0,1.0
1,Joey,(grabbing the candy bar) Yeah Ill take that. ...,7.0,1.0
2,Rachel,"Yeah Pheebs, honey, she just got engaged a cou...",7.0,1.0
3,Ross,"Hey, whats going on? I found a note on my doo...",7.0,1.0
4,Phoebe,"Oh no, let her stay out there. Its sweet. So ...",7.0,1.0
...,...,...,...,...
139,Joey,"Excuse me, Aaron? (The director turns around.)...",7.0,24.0
140,Rachel,"Oh my God! Oh my God! No, she had to have just...",7.0,24.0
141,Ross,"No! I talked to Joey on the set, he hasnt hea...",7.0,24.0
142,Phoebe,"I know! Monicas gonna have a baby! Hey, can t...",7.0,24.0


In [39]:
season_7.episode_number.unique().shape

(24,)

In [40]:
season_8 = create_table_season(friends, 24, 8)

In [41]:
season_8

Unnamed: 0,author,quote,season,episode_number
0,Monica,Okay. Guys! Im not pregnant. You didnt tell ...,8.0,1.0
1,Joey,What? Whats going on? Oh my God! Is that why ...,8.0,1.0
2,Rachel,"Oh my God, who is it?! (Phoebe rolls her eyes....",8.0,1.0
3,Ross,"(To Monica) Okay, I know Im not supposed to k...",8.0,1.0
4,Phoebe,Okay. (Phoebe and Rachel join Monica and Chand...,8.0,1.0
...,...,...,...,...
139,Joey,"Uh, well hes 33. A widower. He seemed like a ...",8.0,24.0
140,Rachel,Oh! I get it! Oh we-we didnt. Just tell me ho...,8.0,24.0
141,Ross,And yet somehow its true! (To Rachel) Squeeze...,8.0,24.0
142,Phoebe,What else? What else? Oh. Ah-uh. Oh. Enter Phe...,8.0,24.0


In [42]:
season_8.episode_number.unique().shape

(24,)

In [43]:
season_9 = create_table_season(friends, 23, 9)

In [44]:
season_9

Unnamed: 0,author,quote,season,episode_number
0,Monica,(entering with everyone else including Mr. Gel...,9.0,1.0
1,Joey,Uh Look Rach(Ross enters.) Hey Ross is here!...,9.0,1.0
2,Rachel,Joey. (seeing the ring) Oh my God. (Pause) Oka...,9.0,1.0
3,Ross,Hey Joey. (To Rachel) Hey you. Youre weird to...,9.0,1.0
4,Phoebe,(To Monica) Oh and I need to talk to you. To s...,9.0,1.0
...,...,...,...,...
133,Joey,(entering) Hey! I'm all packed and ready to go...,9.0,23.0
134,Rachel,Do you have anything that would... get us out ...,9.0,23.0
135,Ross,"Oh, that's right! (to Emma) Daddy and uncle Jo...",9.0,23.0
136,Phoebe,I think David would probably wanna hear a few ...,9.0,23.0


In [45]:
season_9.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23.])

In [46]:
season_10 = create_table_season(friends, 18, 10)

In [47]:
season_10

Unnamed: 0,author,quote,season,episode_number
0,Monica,"Oh, the way you crushed Mike at ping pong was ...",10.0,1.0
1,Joey,Ooh... I can't believe I'm kissing you. I'm ki...,10.0,1.0
2,Rachel,"(through wall) Ooh... I love Barbados! I know,...",10.0,1.0
3,Ross,"... finally... God, you're amazing... I didn't...",10.0,1.0
4,Phoebe,"Hey! Well, okay, Mike's taking a shower, which...",10.0,1.0
...,...,...,...,...
97,Joey,Hey! It's my house-warming present for Monica ...,10.0,17.0
98,Rachel,So if you think I didn't say goodbye to you be...,10.0,17.0
99,Ross,"Rach! Hey. Oh. This was amazing. Yeah, well, t...",10.0,17.0
100,Phoebe,Morning. What's that? It's a baby chick and du...,10.0,17.0


In [48]:
season_10.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17.])

### creating a function to concat all the season dataframes into just one large dataframe

In [50]:
seasons = [season_1, season_2, season_3, season_4, season_5, season_6, season_7, season_8, season_9, season_10]

In [51]:
def combine_seasons(list_of_data):
    df = pd.DataFrame(columns = ['author', 'quote', 'season', 'episode_number'])
    for season in list_of_data:
        df = pd.concat([df, season], ignore_index=True)
    return df

In [52]:
friends_data = combine_seasons(seasons)

In [53]:
friends_data

Unnamed: 0,author,quote,season,episode_number
0,Monica,There's nothing to tell! He's just some guy I ...,1.0,1.0
1,Joey,"C'mon, you're going out with the guy! There's ...",1.0,1.0
2,Rachel,Oh God Monica hi! Thank God! I just went to yo...,1.0,1.0
3,Ross,(mortified) Hi. I just feel like someone reach...,1.0,1.0
4,Phoebe,"Wait, does he eat chalk? Just, 'cause, I don't...",1.0,1.0
...,...,...,...,...
1399,Joey,Hey! It's my house-warming present for Monica ...,10.0,17.0
1400,Rachel,So if you think I didn't say goodbye to you be...,10.0,17.0
1401,Ross,"Rach! Hey. Oh. This was amazing. Yeah, well, t...",10.0,17.0
1402,Phoebe,Morning. What's that? It's a baby chick and du...,10.0,17.0


In [54]:
friends_data.to_csv('Full_Friends_Lines.csv')

#### These next few were used to look at seasons to check for mistakes etc while creating the season dataframes

In [74]:
friends[(friends['author'] == 'monica') & (friends['season'] == 1.0) & (friends['episode_number'] == 1)]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I ...,0.0,1.0
5,monica,1.0,Monica Gets A Roommate,"Okay, everybody relax. This is not even a date...",5.0,1.0
15,monica,1.0,Monica Gets A Roommate,And they weren't looking at you before?!,15.0,1.0
19,monica,1.0,Monica Gets A Roommate,"Are you okay, sweetie?",19.0,1.0
22,monica,1.0,Monica Gets A Roommate,(explaining to the others) Carol moved her stu...,22.0,1.0
...,...,...,...,...,...,...
263,monica,1.0,Monica Gets A Roommate,That's Paul's watch. You just put it back wher...,265.0,1.0
280,monica,1.0,Monica Gets A Roommate,"See ya.... Waitwait, what's with you?",282.0,1.0
284,monica,1.0,Monica Gets A Roommate,What? I-I said you had a-,287.0,1.0
286,monica,1.0,Monica Gets A Roommate,(to Phoebe) Would you stop?,289.0,1.0


In [290]:
friends[(friends['episode_number'] == 17)& (friends['season'] == 10)].tail(40)

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
60251,Rachel,17.0,"The Last One, Part I & II",(on the answering machine) Excuse me?,546.0,10.0
60252,Air stewardess,17.0,"The Last One, Part I & II","(on the answering machine) Miss? Please, sit d...",547.0,10.0
60253,Rachel,17.0,"The Last One, Part I & II",(on the answering machine) I'm sorry. I'm real...,548.0,10.0
60254,Air stewardess,17.0,"The Last One, Part I & II","(on the answering machine) Miss, I can't let y...",549.0,10.0
60255,Ross,17.0,"The Last One, Part I & II",Let her off the plane!,550.0,10.0
60256,Air stewardess,17.0,"The Last One, Part I & II",(on the answering machine) I am afraid you are...,551.0,10.0
60257,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, please, miss, y...",552.0,10.0
60258,Ross,17.0,"The Last One, Part I & II",Try to understand!,553.0,10.0
60259,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, come on, miss, ...",554.0,10.0
60260,Ross,17.0,"The Last One, Part I & II",No! No! Oh my God. Did she get off the plane? ...,555.0,10.0
