### Import Libraries

In [None]:
import glob
import pandas as pd
import pickle
import numpy as np

### Import Data Files and Create Master Data Set

In [98]:
# Create list of all data files
myfiles = []
for each_file in glob.glob('scripts/*.csv'):
    myfiles.append(each_file)

In [99]:
# Check the length to see that all 122 files got appended
len(myfiles)

122

In [100]:
# Check results of myfiles
myfiles[0]

'scripts/s2e07.csv'

In [101]:
# Create data frame and loop through every file
all_scripts = pd.DataFrame(columns=['Character', 'Line', 'Line_Number', 'Episode'])
for file in myfiles:
    script = pd.read_csv(file)
    script['Line_Number'] = script.index
    script['Episode'] = file.replace('scripts/', '').replace('.csv', '')
    all_scripts = all_scripts.append(script)
    

In [102]:
# There are three two part episodes. Find halfway count in line so that they can be split
# I know this won't be the actual cut point and could cause some issues in the analysis but I'm 
# concerned the different sized documents could cause issues
s6e01_count = (all_scripts[ all_scripts['Episode'] == 's6e01']['Line'].nunique())//2
s6e20_count = (all_scripts[ all_scripts['Episode'] == 's6e20']['Line'].nunique())//2
s7e12_count = (all_scripts[ all_scripts['Episode'] == 's7e12']['Line'].nunique())//2
print(s6e01_count, s6e20_count, s7e12_count)

456 417 452


In [103]:
# Split two part episodes into separate parts
all_scripts['Episode_Split'] = np.where((all_scripts['Episode'] == 's6e01') & (all_scripts['Line_Number'] < s6e01_count), 's6e01p1',
                              np.where((all_scripts['Episode'] == 's6e01') & (all_scripts['Line_Number'] >= s6e01_count), 's6e01p2', 
                              np.where((all_scripts['Episode'] == 's6e20') & (all_scripts['Line_Number'] < s6e20_count), 's6e20p1',
                              np.where((all_scripts['Episode'] == 's6e20') & (all_scripts['Line_Number'] >= s6e20_count), 's6e20p2',
                              np.where((all_scripts['Episode'] == 's7e12') & (all_scripts['Line_Number'] < s7e12_count), 's7e12p1',
                              np.where((all_scripts['Episode'] == 's7e12') & (all_scripts['Line_Number'] >= s7e12_count), 's7e12p2', 
                                 all_scripts['Episode']))))))
                                       

In [104]:
all_scripts = all_scripts.sort_values(['Episode_Split', 'Line_Number'])

In [105]:
# Pickle data set
with open('all_scripts.pickle', 'wb') as to_write:
        pickle.dump(all_scripts, to_write)

In [106]:
# Open to check that pickle worked
with open('all_scripts.pickle','rb') as read_file:
    scripts = pickle.load(read_file)

scripts.head()

Unnamed: 0,Character,Line,Line_Number,Episode,Episode_Split
0,Leslie Knope,Hello.,0,s1e01,s1e01
1,Leslie Knope,Hi.,1,s1e01,s1e01
2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01
3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01
4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01


### Create Episode Data Set

In [107]:
scripts_no_char = scripts.drop(columns = ['Character', 'Line_Number'])
scripts_no_char.head()

Unnamed: 0,Line,Episode,Episode_Split
0,Hello.,s1e01,s1e01
1,Hi.,s1e01,s1e01
2,"My name is Leslie Knope, and I work for the Pa...",s1e01,s1e01
3,Can I ask you a few questions?,s1e01,s1e01
4,"Would you say that you are, ""Enjoying yourself...",s1e01,s1e01


In [108]:
scripts_no_char['Episode_Text'] = scripts_no_char.groupby('Episode_Split')['Line'].transform(lambda x : ' '.join(x)) 
episodes = scripts_no_char.drop(columns = 'Line')
episodes.drop_duplicates(inplace = True)
episodes.reset_index(drop=True, inplace = True)
episodes.head()


Unnamed: 0,Episode,Episode_Split,Episode_Text
0,s1e01,s1e01,"Hello. Hi. My name is Leslie Knope, and I work..."
1,s1e02,s1e02,"Well, one of the funner things that we do here..."
2,s1e03,s1e03,"Okay, now, see, here's a good example of a pla..."
3,s1e04,s1e04,"So, we've been called out to this hiking trail..."
4,s1e05,s1e05,"In a town as old as Pawnee, there's a lot of h..."


In [109]:
episodes.tail(25)

Unnamed: 0,Episode,Episode_Split,Episode_Text
100,s6e10,s6e10,"Hello, I am Leslie Knope, and I am here to int..."
101,s6e11,s6e11,I've got to say that this report is quite impr...
102,s6e12,s6e12,"What do you mean, you can't? Look I am throwin..."
103,s6e13,s6e13,"""Leslie Knope and Ben Wyatt were married one y..."
104,s6e14,s6e14,"Okay, Ron. Enough's enough. Let's talk plans f..."
105,s6e15,s6e15,"Okay, I don't want to over-hype this, but pres..."
106,s6e16,s6e16,"Aw, babe, you've gone crazy. The only thing I ..."
107,s6e17,s6e17,God! Sorry! Sorry! What's happening? I can't h...
108,s6e18,s6e18,"I'd have to check, but I'm pretty sure we can ..."
109,s6e19,s6e19,So what do we got so far? We need big ticket i...


In [110]:
with open('episodes.pickle', 'wb') as to_write:
        pickle.dump(episodes, to_write)

In [111]:
with open('episodes.pickle','rb') as read_file:
    episodes = pickle.load(read_file)

In [112]:
episodes.head()

Unnamed: 0,Episode,Episode_Split,Episode_Text
0,s1e01,s1e01,"Hello. Hi. My name is Leslie Knope, and I work..."
1,s1e02,s1e02,"Well, one of the funner things that we do here..."
2,s1e03,s1e03,"Okay, now, see, here's a good example of a pla..."
3,s1e04,s1e04,"So, we've been called out to this hiking trail..."
4,s1e05,s1e05,"In a town as old as Pawnee, there's a lot of h..."


### Create Leslie Episode Data Set

In [117]:
leslie = all_scripts[ all_scripts['Character'] == 'Leslie Knope' ]
leslie.head()

Unnamed: 0,Character,Line,Line_Number,Episode,Episode_Split
0,Leslie Knope,Hello.,0,s1e01,s1e01
1,Leslie Knope,Hi.,1,s1e01,s1e01
2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01
3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01
4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01


In [118]:
leslie['Episode_Text'] = leslie.groupby('Episode_Split')['Line'].transform(lambda x : ' '.join(x)) 
leslie_episodes = leslie.drop(columns = ['Line', 'Line_Number'])
leslie_episodes.drop_duplicates(inplace = True)
leslie_episodes.reset_index(drop=True, inplace = True)
leslie_episodes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie['Episode_Text'] = leslie.groupby('Episode_Split')['Line'].transform(lambda x : ' '.join(x))


Unnamed: 0,Character,Episode,Episode_Split,Episode_Text
0,Leslie Knope,s1e01,s1e01,"Hello. Hi. My name is Leslie Knope, and I work..."
1,Leslie Knope,s1e02,s1e02,"Well, one of the funner things that we do here..."
2,Leslie Knope,s1e03,s1e03,The Parks Department has so many programs. Jer...
3,Leslie Knope,s1e04,s1e04,"I don't believe it. Oh, my God. It's real. Hey..."
4,Leslie Knope,s1e05,s1e05,"In a town as old as Pawnee, there's a lot of h..."


In [119]:
with open('Leslie_all.pickle', 'wb') as to_write:
        pickle.dump(leslie_episodes, to_write)

In [120]:
with open('Leslie_all.pickle','rb') as read_file:
    leslie_episodes = pickle.load(read_file)

leslie_episodes.head()

Unnamed: 0,Character,Episode,Episode_Split,Episode_Text
0,Leslie Knope,s1e01,s1e01,"Hello. Hi. My name is Leslie Knope, and I work..."
1,Leslie Knope,s1e02,s1e02,"Well, one of the funner things that we do here..."
2,Leslie Knope,s1e03,s1e03,The Parks Department has so many programs. Jer...
3,Leslie Knope,s1e04,s1e04,"I don't believe it. Oh, my God. It's real. Hey..."
4,Leslie Knope,s1e05,s1e05,"In a town as old as Pawnee, there's a lot of h..."


### Create Ron Episode Data Set

In [123]:
ron = all_scripts[ all_scripts['Character'] == 'Ron Swanson' ]
ron.head()

Unnamed: 0,Character,Line,Line_Number,Episode,Episode_Split
38,Ron Swanson,Tonight is our next monthly community outreach...,38,s1e01,s1e01
40,Ron Swanson,That is tonight.,40,s1e01,s1e01
42,Ron Swanson,Leslie will be running it.,42,s1e01,s1e01
43,Ron Swanson,And we need one more.,43,s1e01,s1e01
44,Ron Swanson,Who wants in?,44,s1e01,s1e01


In [124]:
ron['Episode_Text'] = ron.groupby('Episode_Split')['Line'].transform(lambda x : ' '.join(x)) 
ron_episodes = ron.drop(columns = ['Line', 'Line_Number'])
ron_episodes.drop_duplicates(inplace = True)
ron_episodes.reset_index(drop=True, inplace = True)
ron_episodes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ron['Episode_Text'] = ron.groupby('Episode_Split')['Line'].transform(lambda x : ' '.join(x))


Unnamed: 0,Character,Episode,Episode_Split,Episode_Text
0,Ron Swanson,s1e01,s1e01,Tonight is our next monthly community outreach...
1,Ron Swanson,s1e02,s1e02,"Uh, sure, Paul. What can I do for you? Yeah, a..."
2,Ron Swanson,s1e03,s1e03,"No comment. Hey, Haverford, maybe one day you'..."
3,Ron Swanson,s1e04,s1e04,Go to jail? What's going on? Put it in an emai...
4,Ron Swanson,s1e05,s1e05,The only reason anybody's going to this thing ...


In [125]:
with open('Ron_all.pickle', 'wb') as to_write:
        pickle.dump(ron_episodes, to_write)

In [126]:
with open('Ron_all.pickle','rb') as read_file:
    ron_episodes = pickle.load(read_file)

ron_episodes.head()

Unnamed: 0,Character,Episode,Episode_Split,Episode_Text
0,Ron Swanson,s1e01,s1e01,Tonight is our next monthly community outreach...
1,Ron Swanson,s1e02,s1e02,"Uh, sure, Paul. What can I do for you? Yeah, a..."
2,Ron Swanson,s1e03,s1e03,"No comment. Hey, Haverford, maybe one day you'..."
3,Ron Swanson,s1e04,s1e04,Go to jail? What's going on? Put it in an emai...
4,Ron Swanson,s1e05,s1e05,The only reason anybody's going to this thing ...


In [128]:
ron_episodes['Episode_Split'].unique()

array(['s1e01', 's1e02', 's1e03', 's1e04', 's1e05', 's1e06', 's2e01',
       's2e02', 's2e04', 's2e05', 's2e06', 's2e07', 's2e08', 's2e09',
       's2e10', 's2e11', 's2e12', 's2e13', 's2e14', 's2e15', 's2e16',
       's2e17', 's2e18', 's2e19', 's2e20', 's2e21', 's2e22', 's2e23',
       's2e24', 's3e01', 's3e02', 's3e03', 's3e04', 's3e05', 's3e06',
       's3e07', 's3e08', 's3e09', 's3e10', 's3e11', 's3e12', 's3e13',
       's3e14', 's3e15', 's3e16', 's4e01', 's4e02', 's4e03', 's4e04',
       's4e05', 's4e06', 's4e07', 's4e08', 's4e09', 's4e10', 's4e11',
       's4e12', 's4e13', 's4e14', 's4e15', 's4e16', 's4e17', 's4e18',
       's4e19', 's4e20', 's4e21', 's4e22', 's5e01', 's5e02', 's5e03',
       's5e04', 's5e05', 's5e06', 's5e07', 's5e08', 's5e09', 's5e10',
       's5e11', 's5e12', 's5e13', 's5e14', 's5e15', 's5e16', 's5e17',
       's5e18', 's5e19', 's5e20', 's5e21', 's5e22', 's6e01p1', 's6e01p2',
       's6e02', 's6e03', 's6e04', 's6e05', 's6e06', 's6e07', 's6e08',
       's6e09', 

In [217]:
leslie_ron = all_scripts[ all_scripts['Character'].isin(['Leslie Knope', 'Ron Swanson']) ]
leslie_ron.reset_index(inplace = True)
leslie_ron.head()

Unnamed: 0,index,Character,Line,Line_Number,Episode,Episode_Split
0,0,Leslie Knope,Hello.,0,s1e01,s1e01
1,1,Leslie Knope,Hi.,1,s1e01,s1e01
2,2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01
3,3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01
4,4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01


In [218]:
leslie_ron['Line_Diff'] = leslie_ron.Line_Number  - leslie_ron.Line_Number.shift(1)
leslie_ron['Character_Change'] = leslie_ron['Character'].shift(1, fill_value=leslie_ron['Character'].head(1)) != leslie_ron['Character']
leslie_ron.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Line_Diff'] = leslie_ron.Line_Number  - leslie_ron.Line_Number.shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Character_Change'] = leslie_ron['Character'].shift(1, fill_value=leslie_ron['Character'].head(1)) != leslie_ron['Character']


Unnamed: 0,index,Character,Line,Line_Number,Episode,Episode_Split,Line_Diff,Character_Change
0,0,Leslie Knope,Hello.,0,s1e01,s1e01,,False
1,1,Leslie Knope,Hi.,1,s1e01,s1e01,1.0,False
2,2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01,1.0,False
3,3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01,1.0,False
4,4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01,1.0,False


In [219]:
inc = 1

for index, row in leslie_ron.iterrows():
    if row['Character_Change'] == 1 or row['Line_Diff'] > 1:
        inc += 1
    test_val = inc
    leslie_ron.at[index,'Group'] = test_val

leslie_ron.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,index,Character,Line,Line_Number,Episode,Episode_Split,Line_Diff,Character_Change,Group
0,0,Leslie Knope,Hello.,0,s1e01,s1e01,,False,1.0
1,1,Leslie Knope,Hi.,1,s1e01,s1e01,1.0,False,1.0
2,2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01,1.0,False,1.0
3,3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01,1.0,False,1.0
4,4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01,1.0,False,1.0
5,5,Leslie Knope,I'm gonna put a lot of fun.,5,s1e01,s1e01,1.0,False,1.0
6,7,Leslie Knope,"Sir, this is a children's slide.",7,s1e01,s1e01,2.0,False,2.0
7,8,Leslie Knope,You're not allowed to sleep in here.,8,s1e01,s1e01,1.0,False,2.0
8,10,Leslie Knope,"You know, when I first tell people that I work...",10,s1e01,s1e01,2.0,False,3.0
9,11,Leslie Knope,"""The government.""",11,s1e01,s1e01,1.0,False,3.0


In [222]:

leslie_ron['Max_Line'] = leslie_ron.groupby('Group')['Line_Number'].transform(max)
leslie_ron['Min_Line'] = leslie_ron.groupby('Group')['Line_Number'].transform(min)
leslie_ron['Episode_Text'] = leslie_ron.groupby('Group')['Line'].transform(lambda x : ' '.join(x))
leslie_ron.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Max_Line'] = leslie_ron.groupby('Group')['Line_Number'].transform(max)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Min_Line'] = leslie_ron.groupby('Group')['Line_Number'].transform(min)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Episode_Text'] = leslie_ron

Unnamed: 0,index,Character,Line,Line_Number,Episode,Episode_Split,Line_Diff,Character_Change,Group,Max_Line,Min_Line,Episode_Text
0,0,Leslie Knope,Hello.,0,s1e01,s1e01,,False,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
1,1,Leslie Knope,Hi.,1,s1e01,s1e01,1.0,False,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
2,2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01,1.0,False,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
3,3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01,1.0,False,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
4,4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01,1.0,False,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."


In [223]:
leslie_ron.drop(columns = ['index', 'Line', 'Line_Number', 'Line_Diff', 'Character_Change'], inplace = True)
leslie_ron.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,Character,Episode,Episode_Split,Group,Max_Line,Min_Line,Episode_Text
0,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
1,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
2,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
3,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
4,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."


In [226]:
leslie_ron.drop_duplicates(inplace = True)
leslie_ron.head(25)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron.drop_duplicates(inplace = True)


Unnamed: 0,Character,Episode,Episode_Split,Group,Max_Line,Min_Line,Episode_Text
0,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work..."
6,Leslie Knope,s1e01,s1e01,2.0,8,7,"Sir, this is a children's slide. You're not al..."
8,Leslie Knope,s1e01,s1e01,3.0,18,10,"You know, when I first tell people that I work..."
17,Leslie Knope,s1e01,s1e01,4.0,20,20,Do you want to come this way?
18,Leslie Knope,s1e01,s1e01,5.0,22,22,"Okay, we're gonna need you to get out."
19,Leslie Knope,s1e01,s1e01,6.0,25,24,Get out of the slide. Okay?
21,Leslie Knope,s1e01,s1e01,7.0,37,27,"Here we go! Okay, wake up. Here we go. Out of ..."
32,Ron Swanson,s1e01,s1e01,8.0,38,38,Tonight is our next monthly community outreach...
33,Leslie Knope,s1e01,s1e01,9.0,39,39,And that is tonight.
34,Ron Swanson,s1e01,s1e01,10.0,40,40,That is tonight.


In [231]:
leslie_ron['Prev_Line_Diff'] = leslie_ron.Min_Line  - leslie_ron.Max_Line.shift(1)
leslie_ron['Next_Line_Diff'] = leslie_ron.Min_Line.shift(-1) - leslie_ron.Max_Line
leslie_ron.head(25)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Prev_Line_Diff'] = leslie_ron.Min_Line  - leslie_ron.Max_Line.shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron['Next_Line_Diff'] = leslie_ron.Min_Line.shift(-1) - leslie_ron.Max_Line


Unnamed: 0,Character,Episode,Episode_Split,Group,Max_Line,Min_Line,Episode_Text,Agg_Line_Diff,Next_Line_Diff,Prev_Line_Diff
0,Leslie Knope,s1e01,s1e01,1.0,5,0,"Hello. Hi. My name is Leslie Knope, and I work...",-2.0,2.0,
6,Leslie Knope,s1e01,s1e01,2.0,8,7,"Sir, this is a children's slide. You're not al...",-2.0,2.0,2.0
8,Leslie Knope,s1e01,s1e01,3.0,18,10,"You know, when I first tell people that I work...",-2.0,2.0,2.0
17,Leslie Knope,s1e01,s1e01,4.0,20,20,Do you want to come this way?,-2.0,2.0,2.0
18,Leslie Knope,s1e01,s1e01,5.0,22,22,"Okay, we're gonna need you to get out.",-2.0,2.0,2.0
19,Leslie Knope,s1e01,s1e01,6.0,25,24,Get out of the slide. Okay?,-2.0,2.0,2.0
21,Leslie Knope,s1e01,s1e01,7.0,37,27,"Here we go! Okay, wake up. Here we go. Out of ...",-1.0,1.0,2.0
32,Ron Swanson,s1e01,s1e01,8.0,38,38,Tonight is our next monthly community outreach...,-1.0,1.0,1.0
33,Leslie Knope,s1e01,s1e01,9.0,39,39,And that is tonight.,-1.0,1.0,1.0
34,Ron Swanson,s1e01,s1e01,10.0,40,40,That is tonight.,-1.0,1.0,1.0


In [234]:
leslie_ron_convo = leslie_ron[ (leslie_ron['Prev_Line_Diff'] == 1.0) | (leslie_ron['Next_Line_Diff'] == 1.0)]
leslie_ron_convo.head(10)

Unnamed: 0,Character,Episode,Episode_Split,Group,Max_Line,Min_Line,Episode_Text,Agg_Line_Diff,Next_Line_Diff,Prev_Line_Diff
21,Leslie Knope,s1e01,s1e01,7.0,37,27,"Here we go! Okay, wake up. Here we go. Out of ...",-1.0,1.0,2.0
32,Ron Swanson,s1e01,s1e01,8.0,38,38,Tonight is our next monthly community outreach...,-1.0,1.0,1.0
33,Leslie Knope,s1e01,s1e01,9.0,39,39,And that is tonight.,-1.0,1.0,1.0
34,Ron Swanson,s1e01,s1e01,10.0,40,40,That is tonight.,-1.0,1.0,1.0
35,Leslie Knope,s1e01,s1e01,11.0,41,41,Right.,-1.0,1.0,1.0
36,Ron Swanson,s1e01,s1e01,12.0,45,42,Leslie will be running it. And we need one mor...,-2.0,2.0,1.0
174,Leslie Knope,s1e01,s1e01,64.0,334,334,"Ron, please.",-1.0,1.0,3.0
175,Ron Swanson,s1e01,s1e01,65.0,336,335,No. No way.,-1.0,1.0,1.0
177,Leslie Knope,s1e01,s1e01,66.0,340,337,"Come on, Ron. I've been a loyal foot soldier. ...",-1.0,1.0,1.0
181,Ron Swanson,s1e01,s1e01,67.0,341,341,Is that a travel pillow around your neck?,-1.0,1.0,1.0


In [237]:
leslie_ron_convo_final = leslie_ron_convo[['Character', 'Episode', 'Episode_Split', 'Episode_Text']]
leslie_ron_convo_final.reset_index(inplace = True)
leslie_ron_convo_final

Unnamed: 0,index,Character,Episode,Episode_Split,Episode_Text
0,21,Leslie Knope,s1e01,s1e01,"Here we go! Okay, wake up. Here we go. Out of ..."
1,32,Ron Swanson,s1e01,s1e01,Tonight is our next monthly community outreach...
2,33,Leslie Knope,s1e01,s1e01,And that is tonight.
3,34,Ron Swanson,s1e01,s1e01,That is tonight.
4,35,Leslie Knope,s1e01,s1e01,Right.
...,...,...,...,...,...
2156,22920,Ron Swanson,s7e12,s7e12p2,"Well, I have to make a couple more adjustments."
2157,22921,Leslie Knope,s7e12,s7e12p2,"Babe, I don't want to seem dramatic, but I hav..."
2158,23068,Leslie Knope,s7e12,s7e12p2,A [bleep] library?
2159,23069,Ron Swanson,s7e12,s7e12p2,"Yeah, that should do it. The swing is fixed!"


In [238]:
with open('Leslie_Ron_Convo.pickle', 'wb') as to_write:
        pickle.dump(leslie_ron_convo_final, to_write)

In [239]:
leslie_ron_mention = all_scripts[ all_scripts['Character'].isin(['Leslie Knope', 'Ron Swanson']) ]
leslie_ron_mention.reset_index(inplace = True)
leslie_ron_mention.head()

Unnamed: 0,index,Character,Line,Line_Number,Episode,Episode_Split
0,0,Leslie Knope,Hello.,0,s1e01,s1e01
1,1,Leslie Knope,Hi.,1,s1e01,s1e01
2,2,Leslie Knope,"My name is Leslie Knope, and I work for the Pa...",2,s1e01,s1e01
3,3,Leslie Knope,Can I ask you a few questions?,3,s1e01,s1e01
4,4,Leslie Knope,"Would you say that you are, ""Enjoying yourself...",4,s1e01,s1e01


In [244]:
leslie_ron_mention['Leslie_Mentions_Ron'] = np.where((leslie_ron_mention['Line'].str.contains('Ron', 'ron')) & (leslie_ron_mention['Character'] == 'Leslie Knope'), True, False)
leslie_ron_mention['Ron_Mentions_Leslie'] = np.where((leslie_ron_mention['Line'].str.contains('Leslie', 'leslie')) & (leslie_ron_mention['Character'] == 'Ron Swanson'), True, False)
true = leslie_ron_mention[leslie_ron_mention['Ron_Mentions_Leslie'] == 1]
true.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145 entries, 36 to 22916
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   index                145 non-null    int64 
 1   Character            145 non-null    object
 2   Line                 145 non-null    object
 3   Line_Number          145 non-null    object
 4   Episode              145 non-null    object
 5   Episode_Split        145 non-null    object
 6   Leslie_Mentions_Ron  145 non-null    bool  
 7   Ron_Mentions_Leslie  145 non-null    bool  
dtypes: bool(2), int64(1), object(5)
memory usage: 8.2+ KB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron_mention['Leslie_Mentions_Ron'] = np.where((leslie_ron_mention['Line'].str.contains('Ron', 'ron')) & (leslie_ron_mention['Character'] == 'Leslie Knope'), True, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leslie_ron_mention['Ron_Mentions_Leslie'] = np.where((leslie_ron_mention['Line'].str.contains('Leslie', 'leslie')) & (leslie_ron_mention['Character'] == 'Ron Swanson'), True, False)


In [247]:
with open('Leslie_Ron_Mention.pickle', 'wb') as to_write:
        pickle.dump(leslie_ron_mention, to_write)