In [1]:
import pandas as pd
import numpy as np

In [2]:
# read in raw data for cars in movies
raw_2010 = pd.read_csv("output_data/raw/cars in movies/2021_03_06_2010Cars.csv")
raw_2010

In [3]:
# how many null elements are in each column
raw_2010.isnull().sum()

In [4]:
# what sort of junk data do we need to remove from the Car column
raw_2010.loc[raw_2010.Car.str.contains('Movie')]

In [5]:
raw_2010.loc[raw_2010.Car.str.contains('Unknown')]

Unnamed: 0,Release,Year,Car,url,Stars
144,Harry Potter and the Deathly Hallows: Part 1,2010,Unknown,vehicle_455543.html,1
622,The Book of Eli,2010,Unknown,vehicle_332601.html,1
1063,The Bounty Hunter,2010,Unknown,vehicle_293920.html,1
2052,Daybreakers,2010,Unknown,vehicle_236773.html,2


In [6]:
raw_2010.loc[raw_2010.Car.str.contains('unknown')]

Unnamed: 0,Release,Year,Car,url,Stars
46,The Twilight Saga: Eclipse,2010,unknown,vehicle_359181.html,1
106,Inception,2010,unknown,vehicle_355636.html,1
117,Inception,2010,Yamaha unknown,vehicle_1203997-Yamaha.html,1
163,The Karate Kid,2010,Huanghai unknown,vehicle_331629-Huanghai.html,2
171,The Karate Kid,2010,unknown,vehicle_372472.html,1
296,The Other Guys,2010,2005 UD unknown,vehicle_1183239-UD-2005.html,1
303,The Other Guys,2010,Yale unknown,vehicle_1183242-Yale.html,1
443,Jackass 3D,2010,unknown,vehicle_1306807.html,1
444,Jackass 3D,2010,unknown,vehicle_383686.html,3
623,The Book of Eli,2010,unknown,vehicle_310513.html,1


In [7]:
# how common are some cars at the most frequent versus least
raw_2010.Car.value_counts()

1999 Ford Crown Victoria       22
2001 Ford Crown Victoria       17
unknown                        15
2003 Ford Crown Victoria       13
2006 Chevrolet Impala           9
                               ..
2000 Hyundai Accent             1
2003 Mercury Grand Marquis      1
2005 Toyota Yaris               1
2004 Škoda Octavia              1
1982 Ford F-350                 1
Name: Car, Length: 1472, dtype: int64

In [8]:
# top 20 most movie appearances
raw_2010.Car.value_counts().nlargest(20)

1999 Ford Crown Victoria                      22
2001 Ford Crown Victoria                      17
unknown                                       15
2003 Ford Crown Victoria                      13
2006 Chevrolet Impala                          9
1997 Ford Econoline                            9
2006 Toyota Prius II                           9
2005 Chrysler 300                              8
2007 Chevrolet Suburban                        8
2003 Lincoln Town Car                          8
2000 Chevrolet Impala                          8
2008 Chevrolet Malibu                          7
2006 Ford Crown Victoria                       7
1998 Ford Crown Victoria                       7
2007 Cadillac Escalade                         6
2007 Chevrolet Tahoe                           6
2003 Lincoln Town Car Stretched Limousine      6
2006 Dodge Charger                             6
1992 Ford Econoline                            6
1997 Toyota Camry                              6
Name: Car, dtype: in

In [9]:
# check data types
raw_2010.dtypes

Release    object
Year        int64
Car        object
url        object
Stars      object
dtype: object

In [10]:
# how many cars are there for each ratings category
raw_2010.Stars.value_counts()

1      1372
2       443
3       185
Nan      32
4        21
Name: Stars, dtype: int64

In [11]:
# return a DataFrame without any missing ratings element
clean_2010 = raw_2010.loc[raw_2010.Stars != 'Nan']

In [12]:
# convert Stars ratings column to integers
clean_2010.Stars = clean_2010.Stars.astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [13]:
# return a new DataFrame that removes irrelevant car elements
new = clean_2010.loc[(~clean_2010.Car.str.contains('Unknown'))\
                     & (~clean_2010.Car.str.contains('unknown')) & (~clean_2010.Car.str.contains('Movie'))]

In [14]:
# reset the index and display
new = new.reset_index(drop=True)
new

Unnamed: 0,Release,Year,Car,url,Stars
0,Toy Story 3,2010,1998 Chevrolet Corvette C5,vehicle_369776-Chevrolet-Corvette-1998.html,3
1,Toy Story 3,2010,Opel Meriva [A],vehicle_974788-Opel-Meriva.html,2
2,Toy Story 3,2010,2005 Opel Zafira [B],vehicle_974791-Opel-Zafira-2005.html,2
3,Toy Story 3,2010,1979 Toyota Truck,vehicle_349573-Toyota-Truck-RN30-1979.html,2
4,Iron Man 2,2010,2010 Acura ZDX,vehicle_340464-Acura-ZDX-YB1-2010.html,2
...,...,...,...,...,...
1982,Daybreakers,2010,Rhino Buggies Hammer,vehicle_236776-Rhino-Buggies-Hammer.html,2
1983,Daybreakers,2010,Rhino Buggies Hammer,vehicle_317794-Rhino-Buggies-Hammer.html,3
1984,Daybreakers,2010,2004 smart Fortwo,vehicle_317791-smart-Fortwo-450-2004.html,1
1985,Daybreakers,2010,1993 Toyota 4Runner,vehicle_236774-Toyota-4Runner-N130-1993.html,2


In [15]:
# how many unique vehicles are there
len(new.Car.unique())

1444

In [16]:
# in order to start working with the Car column, remove whitespaces at both sides of the string elements
for index, row in new.iterrows():
    new.Car[index] = new.Car[index].strip()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new.Car[index] = new.Car[index].strip()


In [17]:
# return a new DataFrame of all duplicate Car rows
dups = new.loc[new.duplicated('Car', keep=False)]

In [18]:
# sort duplicates by Car column and reset index
# a significant portion of the dataset contain cars that appear in multiple films
dups = dups.sort_values("Car")
dups = dups.reset_index(drop=True)
dups

Unnamed: 0,Release,Year,Car,url,Stars
0,Crazy Heart,2010,1964 Chevrolet C-Series,vehicle_356411-Chevrolet-C-Series-1964.html,1
1,Burlesque,2010,1964 Chevrolet C-Series,vehicle_562475-Chevrolet-C-Series-1964.html,1
2,Secretariat,2010,1965 Ford Mustang,vehicle_371640-Ford-Mustang-1965.html,1
3,Secretariat,2010,1965 Ford Mustang,vehicle_371645-Ford-Mustang-1965.html,1
4,Daybreakers,2010,1966 Ford Mustang,vehicle_306723-Ford-Mustang-1966.html,1
...,...,...,...,...,...
821,The Social Network,2010,Volvo S40,vehicle_346265-Volvo-S40.html,1
822,The American,2010,Volvo V70,vehicle_335773-Volvo-V70.html,2
823,Morning Glory,2010,Volvo V70,vehicle_1278240-Volvo-V70.html,1
824,A Nightmare on Elm Street,2010,Volvo V70,vehicle_512215-Volvo-V70.html,2


In [19]:
# sample car and its appearances
dups.loc[dups.Car == '2000 Ford Crown Victoria']

Unnamed: 0,Release,Year,Car,url,Stars
297,Morning Glory,2010,2000 Ford Crown Victoria,vehicle_380889-Ford-Crown-Victoria-P71-2000.html,2
298,Vampires Suck,2010,2000 Ford Crown Victoria,vehicle_357390-Ford-Crown-Victoria-P71-2000.html,3
299,The Bounty Hunter,2010,2000 Ford Crown Victoria,vehicle_293923-Ford-Crown-Victoria-P71-2000.html,2
300,The Other Guys,2010,2000 Ford Crown Victoria,vehicle_346999-Ford-Crown-Victoria-P71-2000.html,3
301,TRON: Legacy,2010,2000 Ford Crown Victoria,vehicle_389013-Ford-Crown-Victoria-P71-2000.html,2


In [20]:
# read in car sales data to use as a filter for the Car column
sales = pd.read_csv('output_data/Car_Sales_2009-2020_edited.csv')
sales

Unnamed: 0,Vehicle,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Acura ILX,,,,12251.0,20430.0,17854.0,18531.0,14597.0,11757.0,11273.0,14685.0,13414.0
1,Acura MDX,31178.0,47210.0,43271.0,50854.0,53040.0,65603.0,58208.0,55495.0,54886.0,51512.0,52019.0,47816.0
2,Acura NSX,,,,,,,,269.0,581.0,170.0,238.0,128.0
3,Acura RDX,10153.0,14975.0,15196.0,29520.0,44750.0,44865.0,51026.0,52361.0,51295.0,63580.0,62876.0,52785.0
4,Acura RL,2043.0,2037.0,1096.0,379.0,39.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,Volvo V70,1816.0,895.0,,,,,,,,,,
453,Volvo XC40,,,,,,,,,,12420.0,17647.0,23778.0
454,Volvo XC60,9262.0,12030.0,12932.0,19139.0,19766.0,19276.0,26134.0,20452.0,22516.0,32689.0,30578.0,32078.0
455,Volvo XC70,5825.0,6626.0,5716.0,5513.0,4882.0,5093.0,5118.0,5425.0,,,,


In [21]:
# list comprehension to apply lower method to string elements in the Car column...
# ...to better filter the data for vehicles in the car sales dataset
lower_cars = sales.copy()
lower_cars.Vehicle = [car.lower() for car in lower_cars.Vehicle]

In [22]:
# same operations for the car in movie dataset
lower_car_mov = new.copy()
lower_car_mov.Car = [car.lower() for car in lower_car_mov.Car]

In [23]:
# create list of cars with sales data
vehicles = lower_cars.Vehicle.unique().tolist()

In [24]:
# use list comprehension and for loop to create a list of brands from the sales dataset
brand_raw = [vehicle.split()[0] for vehicle in vehicles]
brands = []
for brand in brand_raw:
    if brand not in brands:
        brands.append(brand)

In [25]:
print(brands)

['acura', 'alfa', 'audi', 'bmw', 'bentley', 'buick', 'cadillac', 'chevrolet', 'chrysler', 'dodge', 'fiat', 'ford', 'gmc', 'genesis', 'honda', 'hummer', 'hyundai', 'infiniti', 'jaguar', 'jeep', 'kia', 'land', 'lexus', 'lincoln', 'maserati', 'mazda', 'mercedes-benz', 'mercury', 'mini', 'mitsubishi', 'nissan', 'pontiac', 'porsche', 'ram', 'saab', 'saturn', 'scion', 'smart', 'subaru', 'suzuki', 'tesla', 'toyota', 'volkswagen', 'volvo']


In [26]:
# use list comprehension and for loop to create a list of models from the sales dataset
model_raw = [vehicle.split()[-1] for vehicle in vehicles]
models = []
for model in model_raw:
    if model not in models:
        models.append(model)

In [27]:
print(models)

['ilx', 'mdx', 'nsx', 'rdx', 'rl', 'rlx', 'tl', 'tlx', 'tsx', 'zdx', '4c', 'giulia', 'stelvio', 'a3', 'e-tron', 'a4', 'allroad', 'avant', 'sedan', 'a5', 'a6', 'a7', 'a8', 'q3', 'q5', 'q7', 'q8', 'r8', 'tt', 'etron', '1-series', '2-series', '3-series', '4-series', '5-series', '6-series', '7-series', '8-series', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'z4', 'i3', 'i8', 'bentayga', 'gt', 'spur', 'mulsanne', 'cascada', 'enclave', 'encore', 'gx', 'envision', 'lacrosse', 'lucerne', 'regal', 'verano', 'ats', 'ct4', 'ct5', 'ct6', 'cts', 'dts', 'elr', 'escalade', 'srx', 'sts', 'xlr', 'xt4', 'xt5', 'xt6', 'xts', 'avalanche', 'aveo', 'blazer', 'bolt', 'camaro', 'ppv', 'sport', 'express', 'cobalt', 'colorado', 'corvette', 'cruze', 'equinox', 'hhr', 'impala', 'malibu', 'ss', 'silverado', 'sonic', 'spark', 'suburban', 'tahoe', 'trailblazer', 'traverse', 'trax', 'uplander', 'volt', '200', '300', 'aspen', 'cruiser', 'pacifica', 'sebring', 'country', 'avenger', 'caliber', 'challenger', 'charger', 'da

In [28]:
# handle models that share a row of data by splitting and appending to models list, then removing
remove_models = []

for model in models:
    if '/' in model:
        print (f'Splitting and Extending {model}')
        models.extend(model.split('/'))
        remove_models.append(model)
        
for model in remove_models:
    print (f'Removing {model}')
    models.remove(model)

Splitting and Extending q70/m
Splitting and Extending ex/qx50
Splitting and Extending qx60/jx
Splitting and Extending qx70/fx
Splitting and Extending qx80/qx56
Splitting and Extending es/es350
Splitting and Extending gx/gx460
Splitting and Extending hs/hs250h
Splitting and Extending lx/lx570
Splitting and Extending mkx/nautilus
Splitting and Extending gl-class/gls-class
Splitting and Extending gle-class/m-class
Splitting and Extending metris/vito
Splitting and Extending slk-class/slc-class
Splitting and Extending eclipse/spyder
Splitting and Extending 370z/350z
Splitting and Extending crosstrek/crosstrek
Splitting and Extending forenza/reno
Splitting and Extending sx4/aerio
Splitting and Extending corolla/matrix
Splitting and Extending s60/v60
Splitting and Extending s90/v90
Removing q70/m
Removing ex/qx50
Removing qx60/jx
Removing qx70/fx
Removing qx80/qx56
Removing es/es350
Removing gx/gx460
Removing hs/hs250h
Removing lx/lx570
Removing mkx/nautilus
Removing gl-class/gls-class
Removi

In [29]:
print(models)

['ilx', 'mdx', 'nsx', 'rdx', 'rl', 'rlx', 'tl', 'tlx', 'tsx', 'zdx', '4c', 'giulia', 'stelvio', 'a3', 'e-tron', 'a4', 'allroad', 'avant', 'sedan', 'a5', 'a6', 'a7', 'a8', 'q3', 'q5', 'q7', 'q8', 'r8', 'tt', 'etron', '1-series', '2-series', '3-series', '4-series', '5-series', '6-series', '7-series', '8-series', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'z4', 'i3', 'i8', 'bentayga', 'gt', 'spur', 'mulsanne', 'cascada', 'enclave', 'encore', 'gx', 'envision', 'lacrosse', 'lucerne', 'regal', 'verano', 'ats', 'ct4', 'ct5', 'ct6', 'cts', 'dts', 'elr', 'escalade', 'srx', 'sts', 'xlr', 'xt4', 'xt5', 'xt6', 'xts', 'avalanche', 'aveo', 'blazer', 'bolt', 'camaro', 'ppv', 'sport', 'express', 'cobalt', 'colorado', 'corvette', 'cruze', 'equinox', 'hhr', 'impala', 'malibu', 'ss', 'silverado', 'sonic', 'spark', 'suburban', 'tahoe', 'trailblazer', 'traverse', 'trax', 'uplander', 'volt', '200', '300', 'aspen', 'cruiser', 'pacifica', 'sebring', 'country', 'avenger', 'caliber', 'challenger', 'charger', 'da

In [30]:
# join the list of brands with a | delimiter, creating a RegEx pattern to pass through str.contains() next
brand_var = "|".join(brands)
print(brand_var)

acura|alfa|audi|bmw|bentley|buick|cadillac|chevrolet|chrysler|dodge|fiat|ford|gmc|genesis|honda|hummer|hyundai|infiniti|jaguar|jeep|kia|land|lexus|lincoln|maserati|mazda|mercedes-benz|mercury|mini|mitsubishi|nissan|pontiac|porsche|ram|saab|saturn|scion|smart|subaru|suzuki|tesla|toyota|volkswagen|volvo


In [31]:
# same process for models
model_var = "|".join(models)
print(model_var)

ilx|mdx|nsx|rdx|rl|rlx|tl|tlx|tsx|zdx|4c|giulia|stelvio|a3|e-tron|a4|allroad|avant|sedan|a5|a6|a7|a8|q3|q5|q7|q8|r8|tt|etron|1-series|2-series|3-series|4-series|5-series|6-series|7-series|8-series|x1|x2|x3|x4|x5|x6|x7|z4|i3|i8|bentayga|gt|spur|mulsanne|cascada|enclave|encore|gx|envision|lacrosse|lucerne|regal|verano|ats|ct4|ct5|ct6|cts|dts|elr|escalade|srx|sts|xlr|xt4|xt5|xt6|xts|avalanche|aveo|blazer|bolt|camaro|ppv|sport|express|cobalt|colorado|corvette|cruze|equinox|hhr|impala|malibu|ss|silverado|sonic|spark|suburban|tahoe|trailblazer|traverse|trax|uplander|volt|200|300|aspen|cruiser|pacifica|sebring|country|avenger|caliber|challenger|charger|dakota|dart|durango|caravan|journey|nitro|ram|viper|spider|500|500l|500x|c-max|victoria|e-series|ecosport|edge|escape|expedition|explorer|interceptor|f-series|fiesta|flex|focus|fusion|mustang|ranger|taurus|x|transit|connect|acadia|canyon|envoy|savana|sierra|terrain|torrent|yukon|xl|g70|g80|g90|gv80|accord|cr-v|cr-z|civic|fcv|crosstour|element|f

In [32]:
# create DataFrame where only the cars from our car sales data are included
cars_filtered = lower_car_mov.loc[lower_car_mov.Car.str.contains(brand_var)]
cars_filtered

Unnamed: 0,Release,Year,Car,url,Stars
0,Toy Story 3,2010,1998 chevrolet corvette c5,vehicle_369776-Chevrolet-Corvette-1998.html,3
3,Toy Story 3,2010,1979 toyota truck,vehicle_349573-Toyota-Truck-RN30-1979.html,2
4,Iron Man 2,2010,2010 acura zdx,vehicle_340464-Acura-ZDX-YB1-2010.html,2
6,Iron Man 2,2010,2008 audi a8 l d3,vehicle_299564-Audi-A8-L-Typ-4E-2008.html,3
7,Iron Man 2,2010,2010 audi r8 spyder,vehicle_239415-Audi-R8-Spyder-Typ-42-2010.html,3
...,...,...,...,...,...
1980,Daybreakers,2010,1997 mitsubishi diamante,vehicle_318489-Mitsubishi-Diamante-F38A-1997.html,1
1981,Daybreakers,2010,1978 pontiac firebird trans am,vehicle_223185-Pontiac-Firebird-Trans-Am-1978....,3
1984,Daybreakers,2010,2004 smart fortwo,vehicle_317791-smart-Fortwo-450-2004.html,1
1985,Daybreakers,2010,1993 toyota 4runner,vehicle_236774-Toyota-4Runner-N130-1993.html,2


In [33]:
cars_filtered.loc[cars_filtered.Car.str.contains(model_var)]

Unnamed: 0,Release,Year,Car,url,Stars
0,Toy Story 3,2010,1998 chevrolet corvette c5,vehicle_369776-Chevrolet-Corvette-1998.html,3
3,Toy Story 3,2010,1979 toyota truck,vehicle_349573-Toyota-Truck-RN30-1979.html,2
4,Iron Man 2,2010,2010 acura zdx,vehicle_340464-Acura-ZDX-YB1-2010.html,2
6,Iron Man 2,2010,2008 audi a8 l d3,vehicle_299564-Audi-A8-L-Typ-4E-2008.html,3
7,Iron Man 2,2010,2010 audi r8 spyder,vehicle_239415-Audi-R8-Spyder-Typ-42-2010.html,3
...,...,...,...,...,...
1980,Daybreakers,2010,1997 mitsubishi diamante,vehicle_318489-Mitsubishi-Diamante-F38A-1997.html,1
1981,Daybreakers,2010,1978 pontiac firebird trans am,vehicle_223185-Pontiac-Firebird-Trans-Am-1978....,3
1984,Daybreakers,2010,2004 smart fortwo,vehicle_317791-smart-Fortwo-450-2004.html,1
1985,Daybreakers,2010,1993 toyota 4runner,vehicle_236774-Toyota-4Runner-N130-1993.html,2


In [34]:
lower_car_mov.loc[lower_car_mov.Car.str.contains(brand_var) & lower_car_mov.Car.str.contains(model_var)]

Unnamed: 0,Release,Year,Car,url,Stars
0,Toy Story 3,2010,1998 chevrolet corvette c5,vehicle_369776-Chevrolet-Corvette-1998.html,3
3,Toy Story 3,2010,1979 toyota truck,vehicle_349573-Toyota-Truck-RN30-1979.html,2
4,Iron Man 2,2010,2010 acura zdx,vehicle_340464-Acura-ZDX-YB1-2010.html,2
6,Iron Man 2,2010,2008 audi a8 l d3,vehicle_299564-Audi-A8-L-Typ-4E-2008.html,3
7,Iron Man 2,2010,2010 audi r8 spyder,vehicle_239415-Audi-R8-Spyder-Typ-42-2010.html,3
...,...,...,...,...,...
1980,Daybreakers,2010,1997 mitsubishi diamante,vehicle_318489-Mitsubishi-Diamante-F38A-1997.html,1
1981,Daybreakers,2010,1978 pontiac firebird trans am,vehicle_223185-Pontiac-Firebird-Trans-Am-1978....,3
1984,Daybreakers,2010,2004 smart fortwo,vehicle_317791-smart-Fortwo-450-2004.html,1
1985,Daybreakers,2010,1993 toyota 4runner,vehicle_236774-Toyota-4Runner-N130-1993.html,2


In [35]:
# read in film data
movies = pd.read_csv('output_data/Top100_Movies_2010-2021.csv')

In [36]:
# create a DataFrame of only films from the top 2010 grossing films
df10 = movies.loc[movies.Year == 2010]
df10

Unnamed: 0,Rank,Release,Gross,Theaters,Total Gross,Release Date,Distributor,Year
0,1,Avatar,"$466,141,929",3461,"$749,766,139",Dec 18,Twentieth Century Fox,2010
1,2,Toy Story 3,"$415,004,880",4028,"$415,004,880",Jun 18,Walt Disney Studios Motion Pictures,2010
2,3,Alice in Wonderland,"$334,191,110",3739,"$334,191,110",Mar 5,Walt Disney Studios Motion Pictures,2010
3,4,Iron Man 2,"$312,433,331",4390,"$312,433,331",May 7,Paramount Pictures,2010
4,5,The Twilight Saga: Eclipse,"$300,531,751",4468,"$300,531,751",Jun 30,Summit Entertainment,2010
...,...,...,...,...,...,...,...,...
95,96,Scott Pilgrim vs. the World,"$31,524,275",2820,"$31,524,275",Aug 13,Universal Pictures,2010
96,97,Charlie St. Cloud,"$31,162,545",2725,"$31,162,545",Jul 30,Universal Pictures,2010
97,98,Love & Other Drugs,"$31,099,481",2458,"$32,367,005",Nov 24,Twentieth Century Fox,2010
98,99,Morning Glory,"$30,727,814",2544,"$31,011,732",Nov 10,Paramount Pictures,2010


In [37]:
# create a DataFrame that merges the car and movie data
car_mov = pd.merge(cars_filtered, df10, how="left", on="Release")
car_mov

Unnamed: 0,Release,Year_x,Car,url,Stars,Rank,Gross,Theaters,Total Gross,Release Date,Distributor,Year_y
0,Toy Story 3,2010,1998 chevrolet corvette c5,vehicle_369776-Chevrolet-Corvette-1998.html,3,2,"$415,004,880",4028,"$415,004,880",Jun 18,Walt Disney Studios Motion Pictures,2010
1,Toy Story 3,2010,1979 toyota truck,vehicle_349573-Toyota-Truck-RN30-1979.html,2,2,"$415,004,880",4028,"$415,004,880",Jun 18,Walt Disney Studios Motion Pictures,2010
2,Iron Man 2,2010,2010 acura zdx,vehicle_340464-Acura-ZDX-YB1-2010.html,2,4,"$312,433,331",4390,"$312,433,331",May 7,Paramount Pictures,2010
3,Iron Man 2,2010,2008 audi a8 l d3,vehicle_299564-Audi-A8-L-Typ-4E-2008.html,3,4,"$312,433,331",4390,"$312,433,331",May 7,Paramount Pictures,2010
4,Iron Man 2,2010,2010 audi r8 spyder,vehicle_239415-Audi-R8-Spyder-Typ-42-2010.html,3,4,"$312,433,331",4390,"$312,433,331",May 7,Paramount Pictures,2010
...,...,...,...,...,...,...,...,...,...,...,...,...
1635,Daybreakers,2010,1997 mitsubishi diamante,vehicle_318489-Mitsubishi-Diamante-F38A-1997.html,1,100,"$30,101,577",2523,"$30,101,577",Jan 8,Lionsgate,2010
1636,Daybreakers,2010,1978 pontiac firebird trans am,vehicle_223185-Pontiac-Firebird-Trans-Am-1978....,3,100,"$30,101,577",2523,"$30,101,577",Jan 8,Lionsgate,2010
1637,Daybreakers,2010,2004 smart fortwo,vehicle_317791-smart-Fortwo-450-2004.html,1,100,"$30,101,577",2523,"$30,101,577",Jan 8,Lionsgate,2010
1638,Daybreakers,2010,1993 toyota 4runner,vehicle_236774-Toyota-4Runner-N130-1993.html,2,100,"$30,101,577",2523,"$30,101,577",Jan 8,Lionsgate,2010


In [38]:
# display data for cars made in 2010 with a strong screen presence
car_mov.loc[(car_mov.Car.str.contains('2010')) & (car_mov.Stars >= 3)]

Unnamed: 0,Release,Year_x,Car,url,Stars,Rank,Gross,Theaters,Total Gross,Release Date,Distributor,Year_y
4,Iron Man 2,2010,2010 audi r8 spyder,vehicle_239415-Audi-R8-Spyder-Typ-42-2010.html,3,4,"$312,433,331",4390,"$312,433,331",May 7,Paramount Pictures,2010
35,The Twilight Saga: Eclipse,2010,2010 volvo xc60,vehicle_359180-Volvo-XC60-2010.html,3,5,"$300,531,751",4468,"$300,531,751",Jun 30,Summit Entertainment,2010
362,Valentine's Day,2010,2010 ford transit connect,vehicle_316137-Ford-Transit-Connect-2010.html,3,22,"$110,485,654",3665,"$110,485,654",Feb 12,Warner Bros.,2010
373,Valentine's Day,2010,2010 toyota prius iii,vehicle_316128-Toyota-Prius-ZVW30-2010.html,3,22,"$110,485,654",3665,"$110,485,654",Feb 12,Warner Bros.,2010
415,Due Date,2010,2010 land-rover range rover series iii,vehicle_325355-Land-Rover-Range-Rover-L322-201...,3,26,"$98,935,039",3365,"$100,539,043",Nov 5,Warner Bros.,2010
424,Due Date,2010,2010 subaru impreza,vehicle_325351-Subaru-Impreza-GH-2010.html,4,26,"$98,935,039",3365,"$100,539,043",Nov 5,Warner Bros.,2010
693,Little Fockers,2010,2010 chevrolet malibu,vehicle_386490-Chevrolet-Malibu-2010.html,3,37,"$84,557,425",3675,"$148,438,600",Dec 22,Universal Pictures,2010
783,The A-Team,2010,2010 mercedes-benz e-klasse [w212],vehicle_316899-Mercedes-Benz-E-Klasse-W212-201...,3,42,"$77,222,099",3544,"$77,222,099",Jun 11,Twentieth Century Fox,2010
1056,Life as We Know It,2010,2010 triumph scrambler,vehicle_369804-Triumph-Scrambler-2010.html,3,64,"$52,727,343",3150,"$53,374,681",Oct 8,Warner Bros.,2010
1098,Wall Street: Money Never Sleeps,2010,2010 toyota prius iii,vehicle_368590-Toyota-Prius-ZVW30-2010.html,3,65,"$52,474,616",3597,"$52,474,616",Sep 24,Twentieth Century Fox,2010


In [39]:
# create a clean DataFrame that only keeps relevant columns
clean_car_mov = car_mov[['Release', 'Car', 'Stars', 'Rank', 'Gross', 'Year_y']]

In [40]:
# create a list of years, as strings, to search for cars made in the decade prior to our analysis range
yrs = np.arange(2000, 2011, 1).tolist()
years = [str(x) for x in yrs]
print(years)

['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010']


In [41]:
# display cars made in the years 2000 through 2010
all_years = "|".join(years)
clean_car_mov.loc[clean_car_mov.Car.str.contains(all_years) & (clean_car_mov.Stars >= 3)][["Car","Release","Rank","Gross"]].sort_values(by='Car')

Unnamed: 0,Car,Release,Rank,Gross
624,2000 cadillac deville stretched limousine,RED,34,"$89,296,297"
1422,2000 ford crown victoria,Vampires Suck,88,"$36,661,504"
163,2000 ford crown victoria,The Other Guys,18,"$119,219,978"
924,2000 porsche boxster [986],Why Did I Get Married Too?,55,"$60,095,852"
525,2001 dodge caravan,The Town,32,"$92,155,037"
...,...,...,...,...
424,2010 subaru impreza,Due Date,26,"$98,935,039"
1098,2010 toyota prius iii,Wall Street: Money Never Sleeps,65,"$52,474,616"
373,2010 toyota prius iii,Valentine's Day,22,"$110,485,654"
1056,2010 triumph scrambler,Life as We Know It,64,"$52,727,343"


In [42]:
# look at the cars made in 2000
clean_car_mov.loc[clean_car_mov.Car.str.contains('2000') & (clean_car_mov.Stars >= 3)]

Unnamed: 0,Release,Car,Stars,Rank,Gross,Year_y
163,The Other Guys,2000 ford crown victoria,3,18,"$119,219,978",2010
624,RED,2000 cadillac deville stretched limousine,3,34,"$89,296,297",2010
924,Why Did I Get Married Too?,2000 porsche boxster [986],3,55,"$60,095,852",2010
1422,Vampires Suck,2000 ford crown victoria,3,88,"$36,661,504",2010
