# Combining .csv files
-----------

### Two files are being joined into one. These files are:
- **movie_reviews.csv**: Contains movie data from 1990 - 2024
- **movie_reviews_2.csv**: Contains movie data from 1970 - 1989

In [24]:
import os
import pandas as pd

file_path = "C:/Users/tpete/happy-flix/Resources/" # direct path from Taylor's computer, will not work for others

file_list = os.listdir(file_path)
file_list

['.ipynb_checkpoints',
 'combined_movies.csv',
 'Combining_movie_reviews.ipynb',
 'movie_reviews.csv',
 'movie_reviews_2.csv']

In [3]:
# Alternative way to find/list files from your directory 
# (if they are in the same directory as this .ipynb file)
import os
import pandas as pd
#read the path
cwd = os.path.abspath('')
#list all the files from the directory
file_list = os.listdir(cwd)
file_list

['.ipynb_checkpoints',
 'Combining_movie_reviews.ipynb',
 'movie_reviews.csv',
 'movie_reviews_2.csv']

In [7]:
# Another alternative, where the user can obtain only .csv files in the cwd
import os
import glob
import pandas as pd
#list all csv files only
csv_files = glob.glob('*.{}'.format('csv'))
csv_files

['movie_reviews.csv', 'movie_reviews_2.csv']

In [8]:
# Combine 
all_movies_df = pd.DataFrame()

# Loop through each file in csv_files and append the data
for file in csv_files:
    temp_df = pd.read_csv(file)
    all_movies_df = pd.concat([all_movies_df, temp_df], ignore_index=True)

# Now, all_movies_df contains the appended data from all CSV files
print(all_movies_df)

      adult                     backdrop_path                   genre_ids  \
0     False  /xOMo8BRK7PfcJv9JCnx7s5hj0PX.jpg                   [878, 12]   
1     False  /1XDDXPXGiI8id7MrUxK36ke7gkX.jpg     [16, 28, 12, 35, 10751]   
2     False  /j3Z3XktmWB1VhsS8iXNcrR86PXi.jpg           [28, 878, 12, 14]   
3     False  /oe7mWkvYhK4PLRNAVSvonzyUXNy.jpg                    [28, 53]   
4     False  /pwGmXVKUgKN13psUjlhC9zBcq1o.jpg                    [28, 14]   
...     ...                               ...                         ...   
6118  False                               NaN                        [35]   
6119  False  /2DXs3QyFjmAPcsaUBLVkcpVrGAG.jpg                    [53, 28]   
6120  False  /oRAYt09CCyBkYFe9tgZw2voUoMt.jpg                    [27, 53]   
6121  False                               NaN                        [28]   
6122  False  /k85sXP5xX9JGdhFSozKeSQ3elT8.jpg  [878, 18, 9648, 10770, 53]   

           id original_language                   original_title  \
0      

In [9]:
# A peak at the beginning of the combined dataframe, it will contain the most recent movies
all_movies_df.head()

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,reviews
0,False,/xOMo8BRK7PfcJv9JCnx7s5hj0PX.jpg,"[878, 12]",693134,en,Dune: Part Two,Follow the mythic journey of Paul Atreides as ...,4534.956,/1pdfLvkbY9ohJlCjQH2CZjjYVvJ.jpg,2024-02-27,Dune: Part Two,False,8.311,2707,FULL SPOILER-FREE REVIEW @ https://talkingfilm...
1,False,/1XDDXPXGiI8id7MrUxK36ke7gkX.jpg,"[16, 28, 12, 35, 10751]",1011985,en,Kung Fu Panda 4,Po is gearing up to become the spiritual leade...,3214.314,/f7QBvIzoWSJw3jWPGnZBc5vwQ0l.jpg,2024-03-02,Kung Fu Panda 4,False,7.066,897,_Kung Fu Panda 4_ isn’t the best _Kung Fu Pand...
2,False,/j3Z3XktmWB1VhsS8iXNcrR86PXi.jpg,"[28, 878, 12, 14]",823464,en,Godzilla x Kong: The New Empire,"Following their explosive showdown, Godzilla a...",2297.34,/tMefBSflR6PGQLv7WvFPpKLZkyk.jpg,2024-03-27,Godzilla x Kong: The New Empire,False,6.7,575,FULL SPOILER-FREE REVIEW @ https://fandomwire....
3,False,/oe7mWkvYhK4PLRNAVSvonzyUXNy.jpg,"[28, 53]",359410,en,Road House,Ex-UFC fighter Dalton takes a job as a bouncer...,1134.726,/bXi6IQiQDHD00JFio5ZSZOeRSBh.jpg,2024-03-08,Road House,False,7.066,1379,Very poor scenario and the story just does not...
4,False,/pwGmXVKUgKN13psUjlhC9zBcq1o.jpg,"[28, 14]",634492,en,Madame Web,"Forced to confront revelations about her past,...",921.14,/rULWuutDcN5NvtiZi4FRPzRYWSh.jpg,2024-02-14,Madame Web,False,5.676,1046,We start off with an heavily pregnant woman de...


In [21]:
# A peak at the bottom of the combined dataframe, it will contain the oldest movie data that we obtained
all_movies_df.tail()

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,reviews
6118,False,,[35],94073,en,Southern Comforts,A beauty contest is staged in the barn of a lo...,1.451,/1KXGjP303tzMhVubSSYhjPYOq4U.jpg,7/9/1971,Southern Comforts,False,3.8,5,The early 70's saw a genre of film that involv...
6119,False,/2DXs3QyFjmAPcsaUBLVkcpVrGAG.jpg,"[53, 28]",85600,en,Blood Debts,It started with the rape of his daughter then ...,1.451,/7s6xCLnrZ8z6ge1PzAZPVncLzW9.jpg,1/30/1985,Blood Debts,False,3.8,12,Blood Debts is a macho man ripoff of the movie...
6120,False,/oRAYt09CCyBkYFe9tgZw2voUoMt.jpg,"[27, 53]",30928,en,Demon of Paradise,Hunters become the hunted when illegal dynamit...,1.409,/eNx1hqwbmhj2Cn6z87bXXNrOaMr.jpg,4/28/1987,Demon of Paradise,False,4.0,14,"***Lizard-man on the loose in the Philippines,..."
6121,False,,[28],81470,en,The Guy From Harlem,Tough streetwise private investigator Al Conno...,1.403,/7gQb473IENjz2M2AK9ZElqTy2Q3.jpg,9/1/1977,The Guy From Harlem,False,3.0,17,This film is so bad that it is hard to watch. ...
6122,False,/k85sXP5xX9JGdhFSozKeSQ3elT8.jpg,"[878, 18, 9648, 10770, 53]",315379,en,Paper Man,A prank that starts with a group of college st...,1.402,/dSsZCYBhyKjasUBkBWgBnhbaqip.jpg,11/12/1971,Paper Man,False,6.3,10,_**Killing Machine**_ \r\n\r\nAfter a credit...


In [25]:
# Checking our columns list, we have a few nulls that can be handled in a seperate file
all_movies_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6123 entries, 0 to 6122
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   adult              6123 non-null   bool   
 1   backdrop_path      6064 non-null   object 
 2   genre_ids          6123 non-null   object 
 3   id                 6123 non-null   int64  
 4   original_language  6123 non-null   object 
 5   original_title     6123 non-null   object 
 6   overview           6123 non-null   object 
 7   popularity         6123 non-null   float64
 8   poster_path        6119 non-null   object 
 9   release_date       6123 non-null   object 
 10  title              6123 non-null   object 
 11  video              6123 non-null   bool   
 12  vote_average       6123 non-null   float64
 13  vote_count         6123 non-null   int64  
 14  reviews            6123 non-null   object 
dtypes: bool(2), float64(2), int64(2), object(9)
memory usage: 634.0+ KB


In [23]:
# Export to csv file, will be located in our /Resources folder
all_movies_df.to_csv('C:/Users/tpete/happy-flix/Resources/combined_movies.csv')