# Creating the `Full_Friends_Transcript` CSV  
#### This CSV has each of the episodes of the show as its own row

### Imports

In [34]:
import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import CountVectorizer

# Text preprocessing steps - remove numbers, captial letters and punctuation
import re
import string

### Reading In Friends Data

In [35]:
friends = pd.read_csv('friends_quotes.csv')

In [36]:
s1e16 = pd.read_csv('s1e16.csv')

In [37]:
s4e24 = pd.read_csv('s4e24.csv')

In [38]:
s9e11 = pd.read_csv('s9e11.csv')

In [39]:
s9e13 = pd.read_csv('s9e13.csv')

In [40]:
friends = pd.concat([friends, s1e16, s9e11, s9e13, s4e24], ignore_index=True)

In [41]:
friends.drop('Unnamed: 0', axis=1, inplace=True)

In [42]:
friends

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,Monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I ...,0.0,1.0
1,Joey,1.0,Monica Gets A Roommate,"C'mon, you're going out with the guy! There's ...",1.0,1.0
2,Chandler,1.0,Monica Gets A Roommate,"All right Joey, be nice. So does he have a hum...",2.0,1.0
3,Phoebe,1.0,Monica Gets A Roommate,"Wait, does he eat chalk?",3.0,1.0
4,Phoebe,1.0,Monica Gets A Roommate,"Just, 'cause, I don't want her to go through w...",4.0,1.0
...,...,...,...,...,...,...
60855,Minster,24.0,The One With Ross Wedding,"Now Ross, repeat after me. I Ross...",224.0,4.0
60856,Ross,24.0,The One With Ross Wedding,I Ross...,225.0,4.0
60857,Minister,24.0,The One With Ross Wedding,"Take thee, Emily...",226.0,4.0
60858,Ross,24.0,The One With Ross Wedding,"Take thee, Rachel...(All his friends have loo...",227.0,4.0


### Replacing Names in File so that All main characters have the same name

In [43]:
friends['author'].replace({'CHAN':'Chandler','CHANDLER':'Chandler', 'Chandlers':'Chandler', 'chandler':'Chandler',
                    'JOEY':'Joey',
                    'MNCA':'Monica','MONICA':'Monica',
                    'PHOE':'Phoebe','PHOEBE':'Phoebe', 'Pheebs':'Phoebe',
                    'Rache':'Rachel','RACHEL':'Rachel', 'RACH':'Rachel',
                    'ROSS':'Ross'},inplace=True)

In [44]:
friends.author.unique().shape

(1003,)

### Creating a Function to combine each characters quotes from each season into just one document per episode

In [45]:
def create_episode_transcript(data, season_length, season_number):
    column_names = ['quote', 'season', 'episode_number']
    df = pd.DataFrame(columns = column_names)
    for i in range(1,season_length+1):
        new_data = data[(data['episode_number'] == i) 
                        & (data['season'] == season_number)][['quote', 'season', 'episode_number']].groupby('episode_number').agg({'quote':lambda x: ' '.join(x), 'season':'first'}).reset_index()
        df = pd.concat([df, new_data], ignore_index=True)
    return df        
        

#### For all the next few, actually creating ten dataframes for the ten seasons

In [46]:
season_1 = create_episode_transcript(friends, 24, 1)

In [47]:
season_1

Unnamed: 0,quote,season,episode_number
0,There's nothing to tell! He's just some guy I ...,1.0,1.0
1,"What you guys don't understand is, for us, kis...",1.0,2.0
2,"(entering) Hi guys! Hey, Pheebs! Hi! Hey. Oh, ...",1.0,3.0
3,"Alright. Phoebe? Okay, okay. If I were omnipot...",1.0,4.0
4,Would you let it go? It's not that big a deal....,1.0,5.0
5,(reading the program) Ooh! Look! Look! Look! L...,1.0,6.0
6,"Everybody? Shh, shhh. Uhhh... Central Perk is ...",1.0,7.0
7,Dehydrated Japanese noodles under fluorescent ...,1.0,8.0
8,"Terry, I, I, I know that I haven't worked here...",1.0,9.0
9,Guys? There's a somebody I'd like you to meet....,1.0,10.0


In [48]:
season_1.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [50]:
season_2 = create_episode_transcript(friends, 24, 2)

In [51]:
season_2.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])

In [52]:
season_3 = create_episode_transcript(friends, 25, 3)

In [53]:
season_3

Unnamed: 0,quote,season,episode_number
0,I'm tellin' ya that girl totally winked at me....,3.0,1.0
1,Ira Ungerleider Transcribed by: Eric Aasen All...,3.0,2.0
2,"Chandler and Joey's, Chandler is sitting readi...",3.0,3.0
3,"Welcome everybody, welcome to Amazing Discover...",3.0,4.0
4,(entering) Hey! Hey! Hey-hey-hey. So what happ...,3.0,5.0
5,"Rachel has two friends that are not named, so ...",3.0,6.0
6,"So I told Carl, Nobody, no matter how famous ...",3.0,7.0
7,Here you go Pheebs. Who else wants one of my s...,3.0,8.0
8,(reacting to a play) Yes! Yes! Yes! Yes! Awww!...,3.0,9.0
9,"(reading the comics) Eh..., I dont, I dont k...",3.0,10.0


In [54]:
season_4 = create_episode_transcript(friends, 24, 4)

In [55]:
season_4

Unnamed: 0,quote,season,episode_number
0,"(lying on a beach towel, recapping what happen...",4.0,1.0
1,Wow! That ripped! That ripped real nice! How m...,4.0,2.0
2,Hey!! We are so in luck! Treeger said that we ...,4.0,3.0
3,"Hey! New wallet, huh? Yeah, it was time. The o...",4.0,4.0
4,(picking up the slip of paper) Hello! What's t...,4.0,5.0
5,Scott Silveri & Shana Goldberg-Meehan Transcri...,4.0,6.0
6,(entering in a bathrobe) I just walked in the ...,4.0,7.0
7,"(answering phone) Hello. (on phone) Hey, its ...",4.0,8.0
8,"Okay! Come on, no peeking! (They are leading t...",4.0,9.0
9,"Hey! Hey! Im sorry Im late, did I miss anyth...",4.0,10.0


In [56]:
season_5 = create_episode_transcript(friends, 24, 5)

In [57]:
season_5

Unnamed: 0,quote,season,episode_number
0,Friends. Family. We are gathered to celebrate ...,5.0,1.0
1,"You look cute in bubbles. Ehh, you're just liq...",5.0,2.0
2,"(to the nurse) Hi. Hi. Hi, yeah, hi! I'm umm, ...",5.0,3.0
3,(In a sexy voice) Come in. I've been waiting f...,5.0,4.0
4,(quietly) Hi! (quietly) Hi! (They both start k...,5.0,5.0
5,"(entering) Hey! Hey! Hey! Hey!! None of that, ...",5.0,6.0
6,"Are you looking at naked tribe's women? No, lo...",5.0,7.0
7,Oh Monica that was the best Thanksgiving dinne...,5.0,8.0
8,What am I sitting on? Top of the world? Dock o...,5.0,9.0
9,"Shes a woman! Oh, thanks! Yeah! Wow! Is that ...",5.0,10.0


In [58]:
season_6 = create_episode_transcript(friends, 25, 6)

In [59]:
season_6

Unnamed: 0,quote,season,episode_number
0,Whoa! Oh my God! (entering with Phoebe) Come o...,6.0,1.0
1,"Hey! Hey, so did everything go okay with the a...",6.0,2.0
2,(entering) Hey! Hey! Check it out! This is unb...,6.0,3.0
3,"So, which of this kitchen stuff is mine? This ...",6.0,4.0
4,I cannot believe that you didnt tell me that ...,6.0,5.0
5,(watches Joey for a moment) Okay! (Joey quickl...,6.0,6.0
6,Ready? Yeah. Okay. Hi... it's... Rachel... and...,6.0,7.0
7,Perry Rein and Gigi McCreey Transcribed by: Aa...,6.0,8.0
8,"(on phone) Okay, great! Bye. (Hangs up as Chan...",6.0,9.0
9,"Let me ask you, why is everybody using these t...",6.0,10.0


In [60]:
season_7 = create_episode_transcript(friends, 24, 7)

In [61]:
season_7

Unnamed: 0,quote,season,episode_number
0,David Crane & Marta Kauffman Story by: Wil Cal...,7.0,1.0
1,"Hey! Hi! Who wants French toast? Oh, Ill have...",7.0,2.0
2,"Hey, you guys! So, what do you think? About wh...",7.0,3.0
3,"(on TV) Well, if we learned one thing today C....",7.0,4.0
4,Patty Lin Story by: Earl Davis Transcribed by:...,7.0,5.0
5,"Okay, the reason why I asked you guys out to b...",7.0,6.0
6,"Morning! Hi! Oh, how was your date last night?...",7.0,7.0
7,(entering) Hey everybody! Happy Thanksgiving! ...,7.0,8.0
8,(on the bike) Im ready! You sure? Uh-huh! Oka...,7.0,9.0
9,Buh-bye. (Hangs up the phone) I just got us re...,7.0,10.0


In [62]:
season_8 = create_episode_transcript(friends, 24, 8)

In [63]:
season_8

Unnamed: 0,quote,season,episode_number
0,Great. (Takes a picture.) Great! Just give me ...,8.0,1.0
1,"Listen yknow what sir? For the last time, I d...",8.0,2.0
2,Hey! Babe! Arent you excited were going on o...,8.0,3.0
3,"Wait. Before we go in, I just want you to know...",8.0,4.0
4,"Okay, now this one is rare, this one is medium...",8.0,5.0
5,"Hey you guys? What? I know its last minute, b...",8.0,6.0
6,Hey. Hey. Oh my God! You cleaned! (Gasps) Look...,8.0,7.0
7,(coming from the bathroom) Hey Pheebs? Huh? I...,8.0,8.0
8,"Hey Rach listen, did you know that during preg...",8.0,9.0
9,Brian Buckner & Sebastian Jones Story by: Robe...,8.0,10.0


In [64]:
season_9 = create_episode_transcript(friends, 23, 9)

In [65]:
season_9

Unnamed: 0,quote,season,episode_number
0,Joey. (seeing the ring) Oh my God. (Pause) Oka...,9.0,1.0
1,You said youd marry Joey? Okay you have to re...,9.0,2.0
2,Ane B. J. Hi! Hey! So what's the big news you ...,9.0,3.0
3,Oh hey Ross oh I'm so glad someone's here coul...,9.0,4.0
4,"Oh hey you guys, I couldnt get a reservation f...",9.0,5.0
5,"Dedicated to the great work of Eric Aasen, Gui...",9.0,6.0
6,"Dedicated to the great work of Eric Aasen, Gui...",9.0,7.0
7,"Hey Hon, could you help me get the plates down...",9.0,8.0
8,This kitty is Mittens and this one is Fitzhugh...,9.0,9.0
9,"...sprang to his sleigh, to his team gave a wh...",9.0,10.0


In [66]:
season_9.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23.])

In [67]:
season_10 = create_episode_transcript(friends, 17, 10)

In [68]:
season_10

Unnamed: 0,quote,season,episode_number
0,"Oh, the way you crushed Mike at ping pong was ...",10.0,1.0
1,"Okay, Ross, I realise that you didn't expect t...",10.0,2.0
2,"So, you and Rachel tonight, huh? Yeah. It's ac...",10.0,3.0
3,"(she enters) Hey guys! Hey! Honey, I got us th...",10.0,4.0
4,"You know, I'm thinking about letting Emma have...",10.0,5.0
5,"(she enters) Hey... Hey! Hi! Pheebs, what's wr...",10.0,6.0
6,"Hey you guys! Hey! Hey, what are you doing? Oh...",10.0,7.0
7,Hey guys! Hi! Hey! We need to talk to you abou...,10.0,8.0
8,"Hi! Hey! Hey! We're just here to say goodbye, ...",10.0,9.0
9,(really excited) Mmh... this cake is amazing! ...,10.0,10.0


In [69]:
season_9.episode_number.unique()

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23.])

## Creating a function to concat all the season dataframes into just one large dataframe

In [70]:
seasons = [season_1, season_2, season_3, season_4, season_5, season_6, season_7, season_8, season_9, season_10]

In [71]:
def combine_seasons(list_of_data):
    df = pd.DataFrame(columns = ['quote', 'season', 'episode_number'])
    for season in list_of_data:
        df = pd.concat([df, season], ignore_index=True)
    return df

In [72]:
friends_data = combine_seasons(seasons)

In [73]:
friends_data

Unnamed: 0,quote,season,episode_number
0,There's nothing to tell! He's just some guy I ...,1.0,1.0
1,"What you guys don't understand is, for us, kis...",1.0,2.0
2,"(entering) Hi guys! Hey, Pheebs! Hi! Hey. Oh, ...",1.0,3.0
3,"Alright. Phoebe? Okay, okay. If I were omnipot...",1.0,4.0
4,Would you let it go? It's not that big a deal....,1.0,5.0
...,...,...,...
229,"Hi Hey! Hi! How was the honeymoon? Phoebe; Oh,...",10.0,13.0
230,(raising his glass) Thank you guys for having ...,10.0,14.0
231,Previously on Friends... How did the job stuff...,10.0,15.0
232,"All right, all right, all right, let's play on...",10.0,16.0


In [74]:
friends_data.to_csv('Full_Friends_Transcript.csv')

#### These next few were used to look at seasons to check for mistakes etc while creating the season dataframes

In [74]:
friends[(friends['author'] == 'monica') & (friends['season'] == 1.0) & (friends['episode_number'] == 1)]

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
0,monica,1.0,Monica Gets A Roommate,There's nothing to tell! He's just some guy I ...,0.0,1.0
5,monica,1.0,Monica Gets A Roommate,"Okay, everybody relax. This is not even a date...",5.0,1.0
15,monica,1.0,Monica Gets A Roommate,And they weren't looking at you before?!,15.0,1.0
19,monica,1.0,Monica Gets A Roommate,"Are you okay, sweetie?",19.0,1.0
22,monica,1.0,Monica Gets A Roommate,(explaining to the others) Carol moved her stu...,22.0,1.0
...,...,...,...,...,...,...
263,monica,1.0,Monica Gets A Roommate,That's Paul's watch. You just put it back wher...,265.0,1.0
280,monica,1.0,Monica Gets A Roommate,"See ya.... Waitwait, what's with you?",282.0,1.0
284,monica,1.0,Monica Gets A Roommate,What? I-I said you had a-,287.0,1.0
286,monica,1.0,Monica Gets A Roommate,(to Phoebe) Would you stop?,289.0,1.0


In [290]:
friends[(friends['episode_number'] == 17)& (friends['season'] == 10)].tail(40)

Unnamed: 0,author,episode_number,episode_title,quote,quote_order,season
60251,Rachel,17.0,"The Last One, Part I & II",(on the answering machine) Excuse me?,546.0,10.0
60252,Air stewardess,17.0,"The Last One, Part I & II","(on the answering machine) Miss? Please, sit d...",547.0,10.0
60253,Rachel,17.0,"The Last One, Part I & II",(on the answering machine) I'm sorry. I'm real...,548.0,10.0
60254,Air stewardess,17.0,"The Last One, Part I & II","(on the answering machine) Miss, I can't let y...",549.0,10.0
60255,Ross,17.0,"The Last One, Part I & II",Let her off the plane!,550.0,10.0
60256,Air stewardess,17.0,"The Last One, Part I & II",(on the answering machine) I am afraid you are...,551.0,10.0
60257,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, please, miss, y...",552.0,10.0
60258,Ross,17.0,"The Last One, Part I & II",Try to understand!,553.0,10.0
60259,Rachel,17.0,"The Last One, Part I & II","(on the answering machine) Oh, come on, miss, ...",554.0,10.0
60260,Ross,17.0,"The Last One, Part I & II",No! No! Oh my God. Did she get off the plane? ...,555.0,10.0
