In [2]:
import numpy as np
import pandas as pd

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import string
from gensim.models import Word2Vec
from sklearn.cluster import KMeans

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

import matplotlib.pyplot as plt
%matplotlib inline

import plotly.express as px
from sklearn.decomposition import PCA


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/laurenshores/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/laurenshores/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/laurenshores/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
dff = pd.read_csv("AmzingRceTeams_Data.csv")

In [4]:
dff.shape

(313, 34)

In [5]:
def preprocess(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalpha() and word not in stop_words]
    return ' '.join(tokens)

dff['cleaned_occupation_x'] = dff['Occupation_x'].apply(preprocess)
dff['cleaned_occupation_y'] = dff['Occupation_y'].apply(preprocess)

In [6]:
dff

Unnamed: 0,Age_x,Age_y,LGBT_team,AficanAm_team,Friend_team,Family_team,Stranger_team,Female_team,Male_team,Coed_team,...,State_y,Same_State,Occupation_x,Occupation_y,profile_txt,profile_txt2,Top_3,Y,cleaned_occupation_x,cleaned_occupation_y
0,28,28,0,0,0,0,0,0,0,0,...,Connecticut,True,Military folk,Military folk,"[""I'd most like to receive a letter from: Nobo...",['no profile info'],0,0,military folk,military folk
1,28,27,0,0,1,0,0,1,0,0,...,Texas,True,Teachers,Teachers,"[""I'd most like to receive a letter from: Caro...",['no profile info'],0,0,teacher,teacher
2,43,42,0,0,1,0,0,1,0,0,...,Maryland,False,Missing,Missing,"[""I'd most like to receive a letter from: My k...",['no profile info'],0,0,missing,missing
3,65,60,0,0,0,0,0,0,0,1,...,Texas,True,Air Force Pilot,Retired,"[""I'd most like to receive a letter from: My k...",['no profile info'],0,0,air force pilot,retired
4,32,27,0,0,0,0,0,0,0,1,...,California,True,Actor,Bartender,"[""I'd most like to receive a letter from: Brot...",['no profile info'],0,0,actor,bartender
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,38,38,0,0,0,0,0,0,0,1,...,California,True,Special Education Teacher,Hospitality Account Manager,[],['no profile info'],0,0,special education teacher,hospitality account manager
309,54,28,0,0,0,1,0,0,0,1,...,Texas,True,Paint Contractor/ Speech Pathologist,Private Flight Attendant/Influencer,['no profile info'],['no profile info'],0,0,paint speech pathologist,private flight
310,48,25,0,0,0,1,0,0,1,0,...,New York,False,Teaching Assistant/Assistant Football Coach,Senior Manager of client strategy analytics,['Notes:\n\n1: Leg 7 featured two Roadblocks. ...,['no profile info'],1,0,teaching football coach,senior manager client strategy analytics
311,42,43,0,0,1,0,0,0,1,0,...,Idaho,True,Auto Claims Training Specialist,Delivery Driver,['Notes:\n\n1: Leg 7 featured two Roadblocks. ...,['no profile info'],1,0,auto claim training specialist,delivery driver


In this notebook, I want to focus on harnessing text from surveys and Q/A's completed by the contestants. My hope is that this text will reveal certain traits or characteristics about the teams/ their skills and personalities, that could be used to enhance the model to predict winners.

First, I had to acknowledge that I did not have profile data for every team. And then to make a decision if what I had was enough to even move forward with this effort.

The other thing I had to acknowledge is that there were several different formats for this profile text, and also some noisy text that was more of a summary of the race than contestant responses. With this in mind (how noisy and inconsistent this field was), I had to think about if moving forward we be worth the effort.

Lastly, if I did decide to pursue this, what would be the way to go about wringing information from this data?

After thinking about different possible approaches to getting some meaning out of the data including: sentiment analysis, topic modeling, and text summarization, I realized that none of these would readily put me on the path to creating features that to go into my prediction model.

I decided that the best thing to do is think about some information I'd like to know, extract that information where available, and then create a feature around it. For example: 
* What are the contestants strengths?
* What are their weaknesses?
* How is their relationship with their partner? (how long, do they butt heads, is it complimentary)
* What is their personality/disposition? (optimisitic, hard working, relentless, used to getting their own way, competitive, etc)?

Of course these need to be fined tuned. But I think coming up with a few questions that center around a desired feature, and then trying to glean this info from the text may be the best approach. So maybe building a Q/A model is the right way to go about this. HOW HARD WOULD IT BE FOR ME TO BUILD A T5 MODEL? WOULD BERT SUFFICE HERE? ARE THERE PRE-TRAINED MODELS I CAN JUST USE?

types of response formatting
 * profile1 is [] or '['no profile info']
  * starts with "I'd most like to receive a letter from:"
  * starts with "Notes:
   * if Notes, then actual answers are in profile2 OR there may not be a response
  * Starts with 'Name:, then it's a short description
 * profiles with just newline characters and list brackets. Clean these out first. ['\n,'\n']
  * nothing in profile2 either. just missing data

In [None]:
#dff[ ( dff['profile_txt'] != "[]") & (dff['profile_txt'] != "['no profile info']")]['profile_txt'].tail(20) #"['no profile info']"
#dff[  "Notes" in dff['profile_txt'] ]['profile_txt']

In [149]:
dff.iloc[3,-6] 

'["I\'d most like to receive a letter from: My kids\\nComfort item from home I\'ll miss the most: Nothing\\nHobby or activity I\'ll miss the most: Flying\\nI\'d most like to receive a phone call from: NASA, with a space slot\\nSunday paper I read most often: Dallas Morning News\\nFavorite TV show: History Channel\\nFavorite Movie: Flying Tigers\\nFavorite Actor: Alan Ladd\\nFavorite Actress: Meg Ryan\\nFavorite Music: Country & Western\\n", "I\'d most like to receive a letter from: President Bush\\nComfort item from home I\'ll miss the most: Total bathroom facility\\nHobby or activity I\'ll miss the most: Flying (as a pilot)\\nI\'d most like to receive a phone call from: Our grandbabies\\nSunday paper I read most often: Dallas Morning News\\nFavorite TV show: Ally McBeal\\nFavorite Movie: Auntie Mame\\nFavorite Actor: Mel Gibson\\nFavorite Actress: Michelle Pfeiffer\\nFavorite Music: 60s and 70s sounds\\n", \'Dave & Margaretta are the fifth team introduced at the Starting Line at the B

In [150]:
# Clean up the text to see how many samples we actually have
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Initialize lemmatizer and stop words
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

profile_df = dff[['Top_3', 'Y', 'profile_txt', 'profile_txt2']]
def replace_if_starts_with(original_string, check_string, replacement_string):
    if original_string.startswith(check_string):
        return replacement_string
    return original_string


def preprocess_text(text):
    
    #use regex to get rid of any text after ""After Phil Keoghan officially starts..."
    text = re.sub(r"After Phil Keoghan officially starts.*", '', text)
    
    text = text.replace("\\n", " ")
    # Remove non-alphanumeric characters
    #text = re.sub(r'\W', ' ', text)
    
    # alpha chars
    text = re.sub(r'&amp', ' ', text)
    text = re.sub('[^a-zA-Z]+', ' ', text)
    
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stop words and lemmatize
    tokens = [word.lower() for word in tokens] #if word.lower() not in stop_words]
    mystr = ' '.join(tokens)
    
    mystr = replace_if_starts_with(mystr.strip(), "no profile info", "")
    mystr = replace_if_starts_with(mystr, "note", "")
    mystr = mystr.strip()
    
    return mystr
profile_df['p1'] = profile_df['profile_txt'].apply(lambda x: preprocess_text(x))
profile_df['p2'] = profile_df['profile_txt2'].apply(lambda x: preprocess_text(x))
profile_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profile_df['p1'] = profile_df['profile_txt'].apply(lambda x: preprocess_text(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profile_df['p2'] = profile_df['profile_txt2'].apply(lambda x: preprocess_text(x))


Unnamed: 0,Top_3,Y,profile_txt,profile_txt2,p1,p2
0,0,0,"[""I'd most like to receive a letter from: Nobody\nComfort item from home I'll miss the most: My music\nHobby or activity I'll miss the most: Starting the softball season\nI'd most like to receive a phone call from: My sister\nSunday paper I read most often: Hartford Courant\nFavorite TV show: That '70s Show\nFavorite Movie: Fast Times at Ridgemont High\nFavorite Actor: Nicolas Cage\nFavorite Actress: Meg Ryan\nFavorite Music: Alternative\n"", ""I'd most like to receive a letter from: No one\nComfort item from home I'll miss the most: Heavy blanket\nHobby or activity I'll miss the most: Swimming\nI'd most like to receive a phone call from: Mom\nSunday paper I read most often: Hartford Courant\nFavorite TV show: Survivor\nFavorite Movie: Gladiator\nFavorite Actor: Tom Hanks\nFavorite Actress: Jessica Lange\nFavorite Music: Alternative\n"", 'Matt & Ana are the sixth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their first confessional, Ana describes herself as ""high-strung and bossy"", but that it\'s those qualities Matt likes in her. Matt compliments his wife, saying she is ""smart"" and has ""a good head on her shoulders"", but almost never listens to his gut instincts.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. They decide to head to the subway to find a phone. Using a payphone, they find that the flights are located at John F. Kennedy International Airport. Matt & Ana take the subway to get to JFK, where Ana panics, hoping they don't end up being the first team eliminated, though Matt attempts to console her on the matter.\n"", 'Matt & Ana ultimately take a slow form of transportation, ending up on the third flight into Johannesburg along with Kevin & Drew, Nancy & Emily, and Paul & Amie.\n', 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Matt & Ana once again find themselves on the final flight with Kevin & Drew, Nancy & Emily, and Paul & Amie.\n', ""Matt & Ana opt to drive themselves to their next destination at Knife's Edge Bridge at Victoria Falls. After easily getting to Victoria Falls and getting their next clue to Batoka Gorge, they attempt to find a local who can point them in the right direction, but the locals do not know where the gorge is, prompting Matt to make some derogatory remarks about them. After struggling to find the gorge, the married couple end up being the second-to-last team to arrive, and learn of the Detour. They choose to perform Air, which has teams zipline down the gorge, then bungee jump. After completing the task, and learning the Pit Stop is at Songwe Village. Both Matt & Ana and Kevin & Drew end up racing to get to the Pit Stop before the other. Despite Nancy & Emily getting lost on they way as well, Matt & Ana are the last team to arrive, and become the first team to be eliminated from The Amazing Race.\n"", ""Notes:\n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat. While they weren't shown doing the roadblock in the DVD release, Matt later confirmed that he did the Roadblock.""]",['no profile info'],i d most like to receive a letter from nobody comfort item from home i ll miss the most my music hobby or activity i ll miss the most starting the softball season i d most like to receive a phone call from my sister sunday paper i read most often hartford courant favorite tv show that s show favorite movie fast times at ridgemont high favorite actor nicolas cage favorite actress meg ryan favorite music alternative i d most like to receive a letter from no one comfort item from home i ll miss the most heavy blanket hobby or activity i ll miss the most swimming i d most like to receive a phone call from mom sunday paper i read most often hartford courant favorite tv show survivor favorite movie gladiator favorite actor tom hanks favorite actress jessica lange favorite music alternative matt ana are the sixth team introduced at the starting line at the bethesda fountain in central park new york city in their first confessional ana describes herself as high strung and bossy but that it s those qualities matt likes in her matt compliments his wife saying she is smart and has a good head on her shoulders but almost never listens to his gut instincts,
1,0,0,"[""I'd most like to receive a letter from: Carole\nComfort item from home I'll miss the most: My bed\nHobby or activity I'll miss the most: Sitting on the back porch\nI'd most like to receive a phone call from: Will\nSunday paper I read most often: None\nFavorite TV show: Friends\nFavorite Movie: Legends of the Fall\nFavorite Actor: Brad Pitt\nFavorite Actress: Julia Roberts\nFavorite Music: Alternative\n"", ""I'd most like to receive a letter from: Martin and Julie, my brother and sister-in-law\nComfort item from home I'll miss the most: My bed\nHobby or activity I'll miss the most: Coaching\nI'd most like to receive a phone call from: Mom and Dad\nSunday paper I read most often: Houston Post\nFavorite TV show: Who Wants to Be a Millionaire\nFavorite Movie: Notting Hill\nFavorite Actor: Matt Damon\nFavorite Actress: Julia Roberts\nFavorite Music: Spanish\n"", 'Kim & Leslie are the third team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Kim states that she and Leslie are ""mentally prepared to handle anything that comes along"", while Leslie states her major concern is dying over the course of the race.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. The roommates decide to try to hail a cab in the busy New York City streets. They're unsuccessful with their first attempt, but find a cab that will take them to John F. Kennedy International Airport.\n"", 'Despite intending to head to Alitalia airlines, their cab driver takes them to where Swiss Air is located instead. This benefits the women, as they are on the second flight out to Johannesburg, with Dave & Margaretta on board as well, rather than on the later Alitalia flight.\n', 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Kim & Leslie end up on the third charter flight with Dave & Margaretta.\n', 'Kim & Leslie decide to drive themselves to Victoria Falls, rather than hire a driver. Kim & Leslie are the second team to get the clue at Victoria Falls, and decide against going for the Fast Forward, and instead go to Batoka Gorge. On their way to the gorge, they drop to 6th. Arriving at Batoka Gorge, they learn of the Detour. Kim & Leslie decide to perform Air, which will involve them ziplining down the gorge, then bungee jumping to receive their next clue.\n', ""Finishing the Detour, they are told to head to Songwe Village. At the village, Kim & Leslie find the clue for the Roadblock. The task has one member cook an ostrich egg for the whole team to eat. Kim performs the Roadblock for her team. After completing the Roadblock, they're told to head to the nearby Pit Stop to check in. Kim & Leslie finish the first leg in 6th.\n"", 'Kim & Leslie start Leg 2 at 5:33 am. Opening their clue, they are told to head to Songwe Museum. As they drive out of Songwe Village, they spot the museum. Grabbing their next clue, which informs them of the Detour. They choose Near and head to Mosi-oa-Tunya National Park to photograph animals.\n', 'After photographing three required animals, the roommates drive to Mukuni Village to exchange their photos for their next clue, a statue of the Eiffel Tower, indicating them must fly to Paris, France.\n', 'At Johannesburg International Airport, Kim & Leslie find a ticket counter, trying to find flights to Paris via Amsterdam, London, or Brussels. Amie walks up to the same counter the roommates are at, and after Kim orders four tickets, she informs the ticket attendant that Amie is not with them. This escalates as Kim, Leslie, and Amie begin to argue over the situation. The ticket attendant is able to diffuse the three women, and both teams are on the third flight to Paris.\n', 'Once they landed in Paris, as Paul & Amie wait in line for a cab, Kim & Leslie simply cut them and attempt to get into the taxi. This leads to another argument, but Kim & Leslie win as they take the cab to the Eiffel Tower. Ironically, Kim & Leslie arrive after Paul & Amie at the Eiffel Tower. Missing the clue box at the base of the tower, they head to the observation deck to look for a yellow and white flag.\n', 'After looking and asking around, a French gentleman informs Kim & Leslie that he saw a yellow and white flag at the base of the tower. Finding their next clue, it is revealed to be a Roadblock. Kim volunteers to perform the task, and heads up to the observation deck to search for something ""monumental"". In a five-way tie for last, Kim, Dave, Paul, Emily, and Karyn are all frantically searching for their next clue. Kim is the second-to-last to spot the flag on top of the Arc de Triomphe from the group, after Paul and Emily colluded together to show Lenny.\n', ""Back on the ground, she and Leslie get into a cab and head to the monument. They however make a mistake by getting out of their taxi across the street from the Arc de Triomphe, allowing the other trailing teams to check in before them. Once they've arrived at the Pit Stop, Phil informs them they are the last team to arrive and have been eliminated from The Amazing Race.\n"", 'Notes:\n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat.\n\n']",['no profile info'],i d most like to receive a letter from carole comfort item from home i ll miss the most my bed hobby or activity i ll miss the most sitting on the back porch i d most like to receive a phone call from will sunday paper i read most often none favorite tv show friends favorite movie legends of the fall favorite actor brad pitt favorite actress julia roberts favorite music alternative i d most like to receive a letter from martin and julie my brother and sister in law comfort item from home i ll miss the most my bed hobby or activity i ll miss the most coaching i d most like to receive a phone call from mom and dad sunday paper i read most often houston post favorite tv show who wants to be a millionaire favorite movie notting hill favorite actor matt damon favorite actress julia roberts favorite music spanish kim leslie are the third team introduced at the starting line at the bethesda fountain in central park new york city in their confessional kim states that she and leslie are mentally prepared to handle anything that comes along while leslie states her major concern is dying over the course of the race,
2,0,0,"[""I'd most like to receive a letter from: My kids Caitlin and Michael\nComfort item from home I'll miss the most: Waterbed and hot tub\nHobby or activity I'll miss the most: Cuddling with my loved ones\nI'd most like to receive a phone call from: Caitlin and Michael or Charlie\nSunday paper I read most often: Delaware New Journal\nFavorite TV show: Weather Channel\nFavorite Movie: Casablanca\nFavorite Actor: Harrison Ford\nFavorite Actress: Susan Sarandon\nFavorite Music: Jimmy Buffet; Blues\n"", ""I'd most like to receive a letter from: My husband and kids\nComfort item from home I'll miss the most: My kids' hugs and kisses\nHobby or activity I'll miss the most: Working out at the gym\nI'd most like to receive a phone call from: My husband and kids\nSunday paper I read most often: News Journal\nFavorite TV show: Lonely Planet; Survivor\nFavorite Movie: Joe Versus the Volcano\nFavorite Actor: Kevin Spacey\nFavorite Actress: Meryl Streep\nFavorite Music: World; Blues\n"", 'Pat & Brenda are the eighth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Brenda says she hopes that she and Pat inspires other women with families to be themselves, while Pat states while their husbands weren\'t ""real crazy"" for them going around the world, that she and Brenda decided to apply for the show anyway.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. The mothers decide to head toward Times Square to catch a taxi. They are able to make it on the first flight to South Africa, along with Frank & Margarita, Joe & Bill, Lenny & Karyn, and Rob & Brennan.\n"", 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Pat & Brenda end up on the second charter with Lenny & Karyn.\n', 'After they decide to hire a driver, they head to Victoria Falls, Pat & Brenda head to Batoka Gorge for their next clue, rather than perform the Fast Forward. At this point in the Race, they are currently in fifth.\n', 'Arriving at Batoka Gorge, they find the Detour. Despite Pat being scared, the mothers decide to do Air. After finishing the task, they receive their next clue informing them to head to the Pit Stop at Songwe Village. They finish the leg in 5th place.\n', 'Pat & Brenda start Leg 2 at 3:35 am. Opening their clue, they are told to head to Songwe Museum. They decide to drive from the village to find the museum. Despite asking for directions, they cannot locate the museum and choose to do the Fast Forward. Pat & Brenda head toward Bundu Adventures.\n', 'After arriving, the mothers have to whitewater raft down a part of the Zambezi River to get the Fast Forward pass. They complete the task, and earn the Fast Forward, informing them to head to the Arc de Triomphe in Paris, France. Both are excited to visit Paris.\n', 'Despite Brenda having a breakdown while in Johannesburg International Airport, they are able to get one of the first two flights to Paris. Once they arrive at Charles de Gaulle Airport in Paris, they head to the Pit Stop at the Arc de Triomphe, while the other teams head to the Eiffel Tower. After touching on the Pit Stop, they finish the leg in 1st.\n', 'Pat & Brenda started the third leg at 9:06 pm. Opening the first clue for the leg, they are told to head to La Grande Roue. After asking a local Parisian, who points out the Ferris wheel is further down the street, the mothers decide to take the subway to get there. After arriving to the site of the attraction, Pat & Brenda find the clue for the Detour. They choose Easy Walk, where they have to find Foucault pendulum and get their next clue from a cat statue. Not knowing that there are two Foucault pendulums in Paris, they head unintentionally to the incorrect location at Musée des Arts et Métiers to wait until the location opens. They look around the grounds before the location opens, seeing the pendulum, but no cat statue.\n', 'The following morning, at 10:00 am, Pat & Brenda enter the museum. They search for a cat statue, but are still unable to find it. A curator, who overheard the mothers, informs them that they are likely looking for the pendulum at the Panthéon. They exit the museum to get to the correct location.\n', ""After getting the clue at the Panthéon, they are informed to head to Hôtel de Ville, where they will need to find a person in a blue suit, who'll give them their next clue. Pat & Brenda decide to take the subway to get there.\n"", ""Once they get their clue from the man at Hôtel de Ville, they find the Roadblock. Pat volunteers to perform the task. For the Roadblock, Pat will have to wear a sanitary suit and walk through the sewers to find their clue near Place du Châtelet. After finishing the Roadblock, they learn they have to make their way to the Pit Stop at Château des Baux in Les Baux de Provence by train. Despite their hopes, however, their error with the location of the pendulum caused too big of a gap between them and the other teams. Phil informs Pat & Brenda once they've reached the mat, they've been eliminated from The Amazing Race.\n"", ""Notes: \n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat. While they weren't shown doing the roadblock in the DVD release, Pat confirmed that she did the roadblock in a later interview.""]",['no profile info'],i d most like to receive a letter from my kids caitlin and michael comfort item from home i ll miss the most waterbed and hot tub hobby or activity i ll miss the most cuddling with my loved ones i d most like to receive a phone call from caitlin and michael or charlie sunday paper i read most often delaware new journal favorite tv show weather channel favorite movie casablanca favorite actor harrison ford favorite actress susan sarandon favorite music jimmy buffet blues i d most like to receive a letter from my husband and kids comfort item from home i ll miss the most my kids hugs and kisses hobby or activity i ll miss the most working out at the gym i d most like to receive a phone call from my husband and kids sunday paper i read most often news journal favorite tv show lonely planet survivor favorite movie joe versus the volcano favorite actor kevin spacey favorite actress meryl streep favorite music world blues pat brenda are the eighth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional brenda says she hopes that she and pat inspires other women with families to be themselves while pat states while their husbands weren t real crazy for them going around the world that she and brenda decided to apply for the show anyway,
3,0,0,"[""I'd most like to receive a letter from: My kids\nComfort item from home I'll miss the most: Nothing\nHobby or activity I'll miss the most: Flying\nI'd most like to receive a phone call from: NASA, with a space slot\nSunday paper I read most often: Dallas Morning News\nFavorite TV show: History Channel\nFavorite Movie: Flying Tigers\nFavorite Actor: Alan Ladd\nFavorite Actress: Meg Ryan\nFavorite Music: Country & Western\n"", ""I'd most like to receive a letter from: President Bush\nComfort item from home I'll miss the most: Total bathroom facility\nHobby or activity I'll miss the most: Flying (as a pilot)\nI'd most like to receive a phone call from: Our grandbabies\nSunday paper I read most often: Dallas Morning News\nFavorite TV show: Ally McBeal\nFavorite Movie: Auntie Mame\nFavorite Actor: Mel Gibson\nFavorite Actress: Michelle Pfeiffer\nFavorite Music: 60s and 70s sounds\n"", 'Dave & Margaretta are the fifth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Margaretta remarks her husband can be a bit bossy, and she doesn\'t take orders very well. Dave says that his wife is an ""amazing lady"" and ""she can do anything she sets her mind to.""\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. Despite the married couple departing last from the Starting Line, they are still able to get on the second flight with Kim & Leslie.\n"", 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of <a href=""http://en.wikipedia.org/wiki/Livingstone_Airport"" class=""extiw"" titl']",['no profile info'],i d most like to receive a letter from my kids comfort item from home i ll miss the most nothing hobby or activity i ll miss the most flying i d most like to receive a phone call from nasa with a space slot sunday paper i read most often dallas morning news favorite tv show history channel favorite movie flying tigers favorite actor alan ladd favorite actress meg ryan favorite music country western i d most like to receive a letter from president bush comfort item from home i ll miss the most total bathroom facility hobby or activity i ll miss the most flying as a pilot i d most like to receive a phone call from our grandbabies sunday paper i read most often dallas morning news favorite tv show ally mcbeal favorite movie auntie mame favorite actor mel gibson favorite actress michelle pfeiffer favorite music s and s sounds dave margaretta are the fifth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional margaretta remarks her husband can be a bit bossy and she doesn t take orders very well dave says that his wife is an amazing lady and she can do anything she sets her mind to,
4,0,0,"[""I'd most like to receive a letter from: Brothers\nComfort item from home I'll miss the most: Pillows\nHobby or activity I'll miss the most: Ice hockey; gym\nI'd most like to receive a phone call from: Mother\nSunday paper I read most often: New York Daily News\nFavorite TV show: Friends\nFavorite Movie: The Godfather\nFavorite Actor: Robert De Niro\nFavorite Actress: Meg Ryan\nFavorite Music: Rock; Alternative\n"", ""I'd most like to receive a letter from: Parents; grandmom and pop\nComfort item from home I'll miss the most: Music; pillows; massager\nHobby or activity I'll miss the most: Rollerblading; dance class; emails\nI'd most like to receive a phone call from: My sister Deana\nSunday paper I read most often: Los Angeles Times\nFavorite TV show: Friends\nFavorite Movie: Good Will Hunting\nFavorite Actor: Robert Downey Jr.\nFavorite Actress: Julia Roberts\nFavorite Music: R&B\n"", 'Paul & Amie are the second team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, they remark that the race will have an impact on their relationship.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. Using a payphone, Paul gets information that the flights are departing from John F. Kennedy International Airport. They argue a bit over whether to take a train or cab to the airport, they take a train, however they get off at the wrong station. This mistake costs them, as they are on the third flight to Johannesburg with Kevin & Drew, Matt & Ana, and Nancy & Emily.\n"", 'After landing in Johannesburg, they pick up the second clue informing them t']",['no profile info'],i d most like to receive a letter from brothers comfort item from home i ll miss the most pillows hobby or activity i ll miss the most ice hockey gym i d most like to receive a phone call from mother sunday paper i read most often new york daily news favorite tv show friends favorite movie the godfather favorite actor robert de niro favorite actress meg ryan favorite music rock alternative i d most like to receive a letter from parents grandmom and pop comfort item from home i ll miss the most music pillows massager hobby or activity i ll miss the most rollerblading dance class emails i d most like to receive a phone call from my sister deana sunday paper i read most often los angeles times favorite tv show friends favorite movie good will hunting favorite actor robert downey jr favorite actress julia roberts favorite music r b paul amie are the second team introduced at the starting line at the bethesda fountain in central park new york city in their confessional they remark that the race will have an impact on their relationship,
...,...,...,...,...,...,...
308,0,0,[],['no profile info'],,
309,0,0,['no profile info'],['no profile info'],,
310,1,0,"['Notes:\n\n1: Leg 7 featured two Roadblocks. The team member who sat out the first Roadblock was required to perform the second one.\n\n2: Rob & Corey used their Express Pass to bypass the Roadblock on Leg 10. Before using the Express Pass, Rob elected to perform the Roadblock, and this is reflected in the total Roadblock count.']",['no profile info'],,
311,1,0,['Notes:\n\n1: Leg 7 featured two Roadblocks. The team member who sat out the first Roadblock was required to perform the second one.\n'],['no profile info'],,


In [151]:
profile_df['profile'] = np.where(profile_df['p1'] == '', profile_df['p2'], profile_df['p1'] )
profile_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profile_df['profile'] = np.where(profile_df['p1'] == '', profile_df['p2'], profile_df['p1'] )


Unnamed: 0,Top_3,Y,profile_txt,profile_txt2,p1,p2,profile
0,0,0,"[""I'd most like to receive a letter from: Nobody\nComfort item from home I'll miss the most: My music\nHobby or activity I'll miss the most: Starting the softball season\nI'd most like to receive a phone call from: My sister\nSunday paper I read most often: Hartford Courant\nFavorite TV show: That '70s Show\nFavorite Movie: Fast Times at Ridgemont High\nFavorite Actor: Nicolas Cage\nFavorite Actress: Meg Ryan\nFavorite Music: Alternative\n"", ""I'd most like to receive a letter from: No one\nComfort item from home I'll miss the most: Heavy blanket\nHobby or activity I'll miss the most: Swimming\nI'd most like to receive a phone call from: Mom\nSunday paper I read most often: Hartford Courant\nFavorite TV show: Survivor\nFavorite Movie: Gladiator\nFavorite Actor: Tom Hanks\nFavorite Actress: Jessica Lange\nFavorite Music: Alternative\n"", 'Matt & Ana are the sixth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their first confessional, Ana describes herself as ""high-strung and bossy"", but that it\'s those qualities Matt likes in her. Matt compliments his wife, saying she is ""smart"" and has ""a good head on her shoulders"", but almost never listens to his gut instincts.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. They decide to head to the subway to find a phone. Using a payphone, they find that the flights are located at John F. Kennedy International Airport. Matt & Ana take the subway to get to JFK, where Ana panics, hoping they don't end up being the first team eliminated, though Matt attempts to console her on the matter.\n"", 'Matt & Ana ultimately take a slow form of transportation, ending up on the third flight into Johannesburg along with Kevin & Drew, Nancy & Emily, and Paul & Amie.\n', 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Matt & Ana once again find themselves on the final flight with Kevin & Drew, Nancy & Emily, and Paul & Amie.\n', ""Matt & Ana opt to drive themselves to their next destination at Knife's Edge Bridge at Victoria Falls. After easily getting to Victoria Falls and getting their next clue to Batoka Gorge, they attempt to find a local who can point them in the right direction, but the locals do not know where the gorge is, prompting Matt to make some derogatory remarks about them. After struggling to find the gorge, the married couple end up being the second-to-last team to arrive, and learn of the Detour. They choose to perform Air, which has teams zipline down the gorge, then bungee jump. After completing the task, and learning the Pit Stop is at Songwe Village. Both Matt & Ana and Kevin & Drew end up racing to get to the Pit Stop before the other. Despite Nancy & Emily getting lost on they way as well, Matt & Ana are the last team to arrive, and become the first team to be eliminated from The Amazing Race.\n"", ""Notes:\n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat. While they weren't shown doing the roadblock in the DVD release, Matt later confirmed that he did the Roadblock.""]",['no profile info'],i d most like to receive a letter from nobody comfort item from home i ll miss the most my music hobby or activity i ll miss the most starting the softball season i d most like to receive a phone call from my sister sunday paper i read most often hartford courant favorite tv show that s show favorite movie fast times at ridgemont high favorite actor nicolas cage favorite actress meg ryan favorite music alternative i d most like to receive a letter from no one comfort item from home i ll miss the most heavy blanket hobby or activity i ll miss the most swimming i d most like to receive a phone call from mom sunday paper i read most often hartford courant favorite tv show survivor favorite movie gladiator favorite actor tom hanks favorite actress jessica lange favorite music alternative matt ana are the sixth team introduced at the starting line at the bethesda fountain in central park new york city in their first confessional ana describes herself as high strung and bossy but that it s those qualities matt likes in her matt compliments his wife saying she is smart and has a good head on her shoulders but almost never listens to his gut instincts,,i d most like to receive a letter from nobody comfort item from home i ll miss the most my music hobby or activity i ll miss the most starting the softball season i d most like to receive a phone call from my sister sunday paper i read most often hartford courant favorite tv show that s show favorite movie fast times at ridgemont high favorite actor nicolas cage favorite actress meg ryan favorite music alternative i d most like to receive a letter from no one comfort item from home i ll miss the most heavy blanket hobby or activity i ll miss the most swimming i d most like to receive a phone call from mom sunday paper i read most often hartford courant favorite tv show survivor favorite movie gladiator favorite actor tom hanks favorite actress jessica lange favorite music alternative matt ana are the sixth team introduced at the starting line at the bethesda fountain in central park new york city in their first confessional ana describes herself as high strung and bossy but that it s those qualities matt likes in her matt compliments his wife saying she is smart and has a good head on her shoulders but almost never listens to his gut instincts
1,0,0,"[""I'd most like to receive a letter from: Carole\nComfort item from home I'll miss the most: My bed\nHobby or activity I'll miss the most: Sitting on the back porch\nI'd most like to receive a phone call from: Will\nSunday paper I read most often: None\nFavorite TV show: Friends\nFavorite Movie: Legends of the Fall\nFavorite Actor: Brad Pitt\nFavorite Actress: Julia Roberts\nFavorite Music: Alternative\n"", ""I'd most like to receive a letter from: Martin and Julie, my brother and sister-in-law\nComfort item from home I'll miss the most: My bed\nHobby or activity I'll miss the most: Coaching\nI'd most like to receive a phone call from: Mom and Dad\nSunday paper I read most often: Houston Post\nFavorite TV show: Who Wants to Be a Millionaire\nFavorite Movie: Notting Hill\nFavorite Actor: Matt Damon\nFavorite Actress: Julia Roberts\nFavorite Music: Spanish\n"", 'Kim & Leslie are the third team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Kim states that she and Leslie are ""mentally prepared to handle anything that comes along"", while Leslie states her major concern is dying over the course of the race.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. The roommates decide to try to hail a cab in the busy New York City streets. They're unsuccessful with their first attempt, but find a cab that will take them to John F. Kennedy International Airport.\n"", 'Despite intending to head to Alitalia airlines, their cab driver takes them to where Swiss Air is located instead. This benefits the women, as they are on the second flight out to Johannesburg, with Dave & Margaretta on board as well, rather than on the later Alitalia flight.\n', 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Kim & Leslie end up on the third charter flight with Dave & Margaretta.\n', 'Kim & Leslie decide to drive themselves to Victoria Falls, rather than hire a driver. Kim & Leslie are the second team to get the clue at Victoria Falls, and decide against going for the Fast Forward, and instead go to Batoka Gorge. On their way to the gorge, they drop to 6th. Arriving at Batoka Gorge, they learn of the Detour. Kim & Leslie decide to perform Air, which will involve them ziplining down the gorge, then bungee jumping to receive their next clue.\n', ""Finishing the Detour, they are told to head to Songwe Village. At the village, Kim & Leslie find the clue for the Roadblock. The task has one member cook an ostrich egg for the whole team to eat. Kim performs the Roadblock for her team. After completing the Roadblock, they're told to head to the nearby Pit Stop to check in. Kim & Leslie finish the first leg in 6th.\n"", 'Kim & Leslie start Leg 2 at 5:33 am. Opening their clue, they are told to head to Songwe Museum. As they drive out of Songwe Village, they spot the museum. Grabbing their next clue, which informs them of the Detour. They choose Near and head to Mosi-oa-Tunya National Park to photograph animals.\n', 'After photographing three required animals, the roommates drive to Mukuni Village to exchange their photos for their next clue, a statue of the Eiffel Tower, indicating them must fly to Paris, France.\n', 'At Johannesburg International Airport, Kim & Leslie find a ticket counter, trying to find flights to Paris via Amsterdam, London, or Brussels. Amie walks up to the same counter the roommates are at, and after Kim orders four tickets, she informs the ticket attendant that Amie is not with them. This escalates as Kim, Leslie, and Amie begin to argue over the situation. The ticket attendant is able to diffuse the three women, and both teams are on the third flight to Paris.\n', 'Once they landed in Paris, as Paul & Amie wait in line for a cab, Kim & Leslie simply cut them and attempt to get into the taxi. This leads to another argument, but Kim & Leslie win as they take the cab to the Eiffel Tower. Ironically, Kim & Leslie arrive after Paul & Amie at the Eiffel Tower. Missing the clue box at the base of the tower, they head to the observation deck to look for a yellow and white flag.\n', 'After looking and asking around, a French gentleman informs Kim & Leslie that he saw a yellow and white flag at the base of the tower. Finding their next clue, it is revealed to be a Roadblock. Kim volunteers to perform the task, and heads up to the observation deck to search for something ""monumental"". In a five-way tie for last, Kim, Dave, Paul, Emily, and Karyn are all frantically searching for their next clue. Kim is the second-to-last to spot the flag on top of the Arc de Triomphe from the group, after Paul and Emily colluded together to show Lenny.\n', ""Back on the ground, she and Leslie get into a cab and head to the monument. They however make a mistake by getting out of their taxi across the street from the Arc de Triomphe, allowing the other trailing teams to check in before them. Once they've arrived at the Pit Stop, Phil informs them they are the last team to arrive and have been eliminated from The Amazing Race.\n"", 'Notes:\n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat.\n\n']",['no profile info'],i d most like to receive a letter from carole comfort item from home i ll miss the most my bed hobby or activity i ll miss the most sitting on the back porch i d most like to receive a phone call from will sunday paper i read most often none favorite tv show friends favorite movie legends of the fall favorite actor brad pitt favorite actress julia roberts favorite music alternative i d most like to receive a letter from martin and julie my brother and sister in law comfort item from home i ll miss the most my bed hobby or activity i ll miss the most coaching i d most like to receive a phone call from mom and dad sunday paper i read most often houston post favorite tv show who wants to be a millionaire favorite movie notting hill favorite actor matt damon favorite actress julia roberts favorite music spanish kim leslie are the third team introduced at the starting line at the bethesda fountain in central park new york city in their confessional kim states that she and leslie are mentally prepared to handle anything that comes along while leslie states her major concern is dying over the course of the race,,i d most like to receive a letter from carole comfort item from home i ll miss the most my bed hobby or activity i ll miss the most sitting on the back porch i d most like to receive a phone call from will sunday paper i read most often none favorite tv show friends favorite movie legends of the fall favorite actor brad pitt favorite actress julia roberts favorite music alternative i d most like to receive a letter from martin and julie my brother and sister in law comfort item from home i ll miss the most my bed hobby or activity i ll miss the most coaching i d most like to receive a phone call from mom and dad sunday paper i read most often houston post favorite tv show who wants to be a millionaire favorite movie notting hill favorite actor matt damon favorite actress julia roberts favorite music spanish kim leslie are the third team introduced at the starting line at the bethesda fountain in central park new york city in their confessional kim states that she and leslie are mentally prepared to handle anything that comes along while leslie states her major concern is dying over the course of the race
2,0,0,"[""I'd most like to receive a letter from: My kids Caitlin and Michael\nComfort item from home I'll miss the most: Waterbed and hot tub\nHobby or activity I'll miss the most: Cuddling with my loved ones\nI'd most like to receive a phone call from: Caitlin and Michael or Charlie\nSunday paper I read most often: Delaware New Journal\nFavorite TV show: Weather Channel\nFavorite Movie: Casablanca\nFavorite Actor: Harrison Ford\nFavorite Actress: Susan Sarandon\nFavorite Music: Jimmy Buffet; Blues\n"", ""I'd most like to receive a letter from: My husband and kids\nComfort item from home I'll miss the most: My kids' hugs and kisses\nHobby or activity I'll miss the most: Working out at the gym\nI'd most like to receive a phone call from: My husband and kids\nSunday paper I read most often: News Journal\nFavorite TV show: Lonely Planet; Survivor\nFavorite Movie: Joe Versus the Volcano\nFavorite Actor: Kevin Spacey\nFavorite Actress: Meryl Streep\nFavorite Music: World; Blues\n"", 'Pat & Brenda are the eighth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Brenda says she hopes that she and Pat inspires other women with families to be themselves, while Pat states while their husbands weren\'t ""real crazy"" for them going around the world, that she and Brenda decided to apply for the show anyway.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. The mothers decide to head toward Times Square to catch a taxi. They are able to make it on the first flight to South Africa, along with Frank & Margarita, Joe & Bill, Lenny & Karyn, and Rob & Brennan.\n"", 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of Livingstone Airport. Pat & Brenda end up on the second charter with Lenny & Karyn.\n', 'After they decide to hire a driver, they head to Victoria Falls, Pat & Brenda head to Batoka Gorge for their next clue, rather than perform the Fast Forward. At this point in the Race, they are currently in fifth.\n', 'Arriving at Batoka Gorge, they find the Detour. Despite Pat being scared, the mothers decide to do Air. After finishing the task, they receive their next clue informing them to head to the Pit Stop at Songwe Village. They finish the leg in 5th place.\n', 'Pat & Brenda start Leg 2 at 3:35 am. Opening their clue, they are told to head to Songwe Museum. They decide to drive from the village to find the museum. Despite asking for directions, they cannot locate the museum and choose to do the Fast Forward. Pat & Brenda head toward Bundu Adventures.\n', 'After arriving, the mothers have to whitewater raft down a part of the Zambezi River to get the Fast Forward pass. They complete the task, and earn the Fast Forward, informing them to head to the Arc de Triomphe in Paris, France. Both are excited to visit Paris.\n', 'Despite Brenda having a breakdown while in Johannesburg International Airport, they are able to get one of the first two flights to Paris. Once they arrive at Charles de Gaulle Airport in Paris, they head to the Pit Stop at the Arc de Triomphe, while the other teams head to the Eiffel Tower. After touching on the Pit Stop, they finish the leg in 1st.\n', 'Pat & Brenda started the third leg at 9:06 pm. Opening the first clue for the leg, they are told to head to La Grande Roue. After asking a local Parisian, who points out the Ferris wheel is further down the street, the mothers decide to take the subway to get there. After arriving to the site of the attraction, Pat & Brenda find the clue for the Detour. They choose Easy Walk, where they have to find Foucault pendulum and get their next clue from a cat statue. Not knowing that there are two Foucault pendulums in Paris, they head unintentionally to the incorrect location at Musée des Arts et Métiers to wait until the location opens. They look around the grounds before the location opens, seeing the pendulum, but no cat statue.\n', 'The following morning, at 10:00 am, Pat & Brenda enter the museum. They search for a cat statue, but are still unable to find it. A curator, who overheard the mothers, informs them that they are likely looking for the pendulum at the Panthéon. They exit the museum to get to the correct location.\n', ""After getting the clue at the Panthéon, they are informed to head to Hôtel de Ville, where they will need to find a person in a blue suit, who'll give them their next clue. Pat & Brenda decide to take the subway to get there.\n"", ""Once they get their clue from the man at Hôtel de Ville, they find the Roadblock. Pat volunteers to perform the task. For the Roadblock, Pat will have to wear a sanitary suit and walk through the sewers to find their clue near Place du Châtelet. After finishing the Roadblock, they learn they have to make their way to the Pit Stop at Château des Baux in Les Baux de Provence by train. Despite their hopes, however, their error with the location of the pendulum caused too big of a gap between them and the other teams. Phil informs Pat & Brenda once they've reached the mat, they've been eliminated from The Amazing Race.\n"", ""Notes: \n\n1: There was an unaired Roadblock where one teammate had to cook a whole ostrich egg for the whole team to eat. While they weren't shown doing the roadblock in the DVD release, Pat confirmed that she did the roadblock in a later interview.""]",['no profile info'],i d most like to receive a letter from my kids caitlin and michael comfort item from home i ll miss the most waterbed and hot tub hobby or activity i ll miss the most cuddling with my loved ones i d most like to receive a phone call from caitlin and michael or charlie sunday paper i read most often delaware new journal favorite tv show weather channel favorite movie casablanca favorite actor harrison ford favorite actress susan sarandon favorite music jimmy buffet blues i d most like to receive a letter from my husband and kids comfort item from home i ll miss the most my kids hugs and kisses hobby or activity i ll miss the most working out at the gym i d most like to receive a phone call from my husband and kids sunday paper i read most often news journal favorite tv show lonely planet survivor favorite movie joe versus the volcano favorite actor kevin spacey favorite actress meryl streep favorite music world blues pat brenda are the eighth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional brenda says she hopes that she and pat inspires other women with families to be themselves while pat states while their husbands weren t real crazy for them going around the world that she and brenda decided to apply for the show anyway,,i d most like to receive a letter from my kids caitlin and michael comfort item from home i ll miss the most waterbed and hot tub hobby or activity i ll miss the most cuddling with my loved ones i d most like to receive a phone call from caitlin and michael or charlie sunday paper i read most often delaware new journal favorite tv show weather channel favorite movie casablanca favorite actor harrison ford favorite actress susan sarandon favorite music jimmy buffet blues i d most like to receive a letter from my husband and kids comfort item from home i ll miss the most my kids hugs and kisses hobby or activity i ll miss the most working out at the gym i d most like to receive a phone call from my husband and kids sunday paper i read most often news journal favorite tv show lonely planet survivor favorite movie joe versus the volcano favorite actor kevin spacey favorite actress meryl streep favorite music world blues pat brenda are the eighth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional brenda says she hopes that she and pat inspires other women with families to be themselves while pat states while their husbands weren t real crazy for them going around the world that she and brenda decided to apply for the show anyway
3,0,0,"[""I'd most like to receive a letter from: My kids\nComfort item from home I'll miss the most: Nothing\nHobby or activity I'll miss the most: Flying\nI'd most like to receive a phone call from: NASA, with a space slot\nSunday paper I read most often: Dallas Morning News\nFavorite TV show: History Channel\nFavorite Movie: Flying Tigers\nFavorite Actor: Alan Ladd\nFavorite Actress: Meg Ryan\nFavorite Music: Country & Western\n"", ""I'd most like to receive a letter from: President Bush\nComfort item from home I'll miss the most: Total bathroom facility\nHobby or activity I'll miss the most: Flying (as a pilot)\nI'd most like to receive a phone call from: Our grandbabies\nSunday paper I read most often: Dallas Morning News\nFavorite TV show: Ally McBeal\nFavorite Movie: Auntie Mame\nFavorite Actor: Mel Gibson\nFavorite Actress: Michelle Pfeiffer\nFavorite Music: 60s and 70s sounds\n"", 'Dave & Margaretta are the fifth team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, Margaretta remarks her husband can be a bit bossy, and she doesn\'t take orders very well. Dave says that his wife is an ""amazing lady"" and ""she can do anything she sets her mind to.""\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. Despite the married couple departing last from the Starting Line, they are still able to get on the second flight with Kim & Leslie.\n"", 'After landing in Johannesburg, they pick up the second clue informing them to sign up for a charter flight to Livingstone, Zambia, and get their next clue from a car waiting outside of <a href=""http://en.wikipedia.org/wiki/Livingstone_Airport"" class=""extiw"" titl']",['no profile info'],i d most like to receive a letter from my kids comfort item from home i ll miss the most nothing hobby or activity i ll miss the most flying i d most like to receive a phone call from nasa with a space slot sunday paper i read most often dallas morning news favorite tv show history channel favorite movie flying tigers favorite actor alan ladd favorite actress meg ryan favorite music country western i d most like to receive a letter from president bush comfort item from home i ll miss the most total bathroom facility hobby or activity i ll miss the most flying as a pilot i d most like to receive a phone call from our grandbabies sunday paper i read most often dallas morning news favorite tv show ally mcbeal favorite movie auntie mame favorite actor mel gibson favorite actress michelle pfeiffer favorite music s and s sounds dave margaretta are the fifth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional margaretta remarks her husband can be a bit bossy and she doesn t take orders very well dave says that his wife is an amazing lady and she can do anything she sets her mind to,,i d most like to receive a letter from my kids comfort item from home i ll miss the most nothing hobby or activity i ll miss the most flying i d most like to receive a phone call from nasa with a space slot sunday paper i read most often dallas morning news favorite tv show history channel favorite movie flying tigers favorite actor alan ladd favorite actress meg ryan favorite music country western i d most like to receive a letter from president bush comfort item from home i ll miss the most total bathroom facility hobby or activity i ll miss the most flying as a pilot i d most like to receive a phone call from our grandbabies sunday paper i read most often dallas morning news favorite tv show ally mcbeal favorite movie auntie mame favorite actor mel gibson favorite actress michelle pfeiffer favorite music s and s sounds dave margaretta are the fifth team introduced at the starting line at the bethesda fountain in central park new york city in their confessional margaretta remarks her husband can be a bit bossy and she doesn t take orders very well dave says that his wife is an amazing lady and she can do anything she sets her mind to
4,0,0,"[""I'd most like to receive a letter from: Brothers\nComfort item from home I'll miss the most: Pillows\nHobby or activity I'll miss the most: Ice hockey; gym\nI'd most like to receive a phone call from: Mother\nSunday paper I read most often: New York Daily News\nFavorite TV show: Friends\nFavorite Movie: The Godfather\nFavorite Actor: Robert De Niro\nFavorite Actress: Meg Ryan\nFavorite Music: Rock; Alternative\n"", ""I'd most like to receive a letter from: Parents; grandmom and pop\nComfort item from home I'll miss the most: Music; pillows; massager\nHobby or activity I'll miss the most: Rollerblading; dance class; emails\nI'd most like to receive a phone call from: My sister Deana\nSunday paper I read most often: Los Angeles Times\nFavorite TV show: Friends\nFavorite Movie: Good Will Hunting\nFavorite Actor: Robert Downey Jr.\nFavorite Actress: Julia Roberts\nFavorite Music: R&B\n"", 'Paul & Amie are the second team introduced at the Starting Line at the Bethesda Fountain in Central Park, New York City. In their confessional, they remark that the race will have an impact on their relationship.\n', ""After Phil Keoghan officially starts the first leg of the race, they learn they have to fly to Johannesburg, South Africa, where they'll find their next clue in Johannesburg International Airport, and that there are three flights to Johannesburg from New York City, but are not informed from which airport they're departing at. Using a payphone, Paul gets information that the flights are departing from John F. Kennedy International Airport. They argue a bit over whether to take a train or cab to the airport, they take a train, however they get off at the wrong station. This mistake costs them, as they are on the third flight to Johannesburg with Kevin & Drew, Matt & Ana, and Nancy & Emily.\n"", 'After landing in Johannesburg, they pick up the second clue informing them t']",['no profile info'],i d most like to receive a letter from brothers comfort item from home i ll miss the most pillows hobby or activity i ll miss the most ice hockey gym i d most like to receive a phone call from mother sunday paper i read most often new york daily news favorite tv show friends favorite movie the godfather favorite actor robert de niro favorite actress meg ryan favorite music rock alternative i d most like to receive a letter from parents grandmom and pop comfort item from home i ll miss the most music pillows massager hobby or activity i ll miss the most rollerblading dance class emails i d most like to receive a phone call from my sister deana sunday paper i read most often los angeles times favorite tv show friends favorite movie good will hunting favorite actor robert downey jr favorite actress julia roberts favorite music r b paul amie are the second team introduced at the starting line at the bethesda fountain in central park new york city in their confessional they remark that the race will have an impact on their relationship,,i d most like to receive a letter from brothers comfort item from home i ll miss the most pillows hobby or activity i ll miss the most ice hockey gym i d most like to receive a phone call from mother sunday paper i read most often new york daily news favorite tv show friends favorite movie the godfather favorite actor robert de niro favorite actress meg ryan favorite music rock alternative i d most like to receive a letter from parents grandmom and pop comfort item from home i ll miss the most music pillows massager hobby or activity i ll miss the most rollerblading dance class emails i d most like to receive a phone call from my sister deana sunday paper i read most often los angeles times favorite tv show friends favorite movie good will hunting favorite actor robert downey jr favorite actress julia roberts favorite music r b paul amie are the second team introduced at the starting line at the bethesda fountain in central park new york city in their confessional they remark that the race will have an impact on their relationship
...,...,...,...,...,...,...,...
308,0,0,[],['no profile info'],,,
309,0,0,['no profile info'],['no profile info'],,,
310,1,0,"['Notes:\n\n1: Leg 7 featured two Roadblocks. The team member who sat out the first Roadblock was required to perform the second one.\n\n2: Rob & Corey used their Express Pass to bypass the Roadblock on Leg 10. Before using the Express Pass, Rob elected to perform the Roadblock, and this is reflected in the total Roadblock count.']",['no profile info'],,,
311,1,0,['Notes:\n\n1: Leg 7 featured two Roadblocks. The team member who sat out the first Roadblock was required to perform the second one.\n'],['no profile info'],,,


In [152]:
z = profile_df[profile_df['profile'] != '']
print("Number of profiles with text: " + str(len(z)))
print("-------------- Winner breakout for teams with profile txt----------------")
print(z['Y'].value_counts())

print("-------------- Top 3 breakout for teams with profile txt----------------")
print(z['Top_3'].value_counts())

Number of profiles with text: 200
-------------- Winner breakout for teams with profile txt----------------
0    180
1     20
Name: Y, dtype: int64
-------------- Top 3 breakout for teams with profile txt----------------
0    145
1     55
Name: Top_3, dtype: int64


In [153]:
pd.set_option('display.max_colwidth', None)
z.iloc[0:50,-1]              

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

# BERT

In [155]:
from transformers import pipeline

# Load the question-answering pipeline
qa_pipeline = pipeline("question-answering")


ModuleNotFoundError: No module named 'transformers'