In [1]:
%matplotlib notebook

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

In [3]:
csv_files = [
    "../Resources/Houston-population-2023-07-27.csv",
    "../Resources/Dallas-Fort Worth-population-2023-07-27.csv",
    "../Resources/San Antonio-population-2023-07-28.csv",
    "../Resources/Austin-population-2023-07-27.csv",
    "../Resources/New York City-population-2023-07-27.csv"
]

new_column_names = ["Date", "Population", "Annual Change Date"]

start_year = 2008
end_year = 2023

for file_path in csv_files:
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Get the old column names from the DataFrame
    old_column_names = df.columns.tolist()
    
    # Create a dictionary to map old column names to new column names
    column_mapping = dict(zip(old_column_names, new_column_names))
    
    # Rename the columns using the dictionary
    df.rename(columns=column_mapping, inplace=True)
    
    if 'Date' in df:
        df['Date'] = pd.to_datetime(df['Date'])
    
    df = df[df['Date'].dt.year.between(start_year, end_year)]
    
    df.sort_values(by='Date', inplace=True)

    # Save the updated DataFrame back to the CSV file
    df.to_csv(file_path, index=False)

print("Columns in all CSV files have been renamed to match.")

Columns in all CSV files have been renamed to match.


In [4]:
csv_path = Path("../Resources/Houston-population-2023-07-27.csv")
pd.read_csv(csv_path)
pop_hou=pd.read_csv(csv_path)
pop_hou.head()


Unnamed: 0,Date,Population,Annual Change Date
0,2008-12-31,4727000,2.63
1,2009-12-31,4850000,2.6
2,2010-12-31,4976000,2.6
3,2011-12-31,5106000,2.61
4,2012-12-31,5239000,2.6


In [5]:
csv_path = Path("../Resources/Dallas-Fort Worth-population-2023-07-27.csv")
pd.read_csv(csv_path)
pop_dfw=pd.read_csv(csv_path)
pop_dfw.head()

Unnamed: 0,Date,Population,Annual Change Date
0,2008-12-31,4936000,2.15
1,2009-12-31,5041000,2.13
2,2010-12-31,5149000,2.14
3,2011-12-31,5259000,2.14
4,2012-12-31,5372000,2.15


In [6]:
csv_path = Path("../Resources/San Antonio-population-2023-07-28.csv")
pd.read_csv(csv_path)
pop_sa=pd.read_csv(csv_path)
pop_sa.head()

Unnamed: 0,Date,Population,Annual Change Date
0,2008-12-31,1674000,2.89
1,2009-12-31,1722000,2.87
2,2010-12-31,1771000,2.85
3,2011-12-31,1821000,2.82
4,2012-12-31,1873000,2.86


In [16]:
csv_path = Path("../Resources/Austin-population-2023-07-27.csv")
pd.read_csv(csv_path)
pop_aust=pd.read_csv(csv_path)
pop_aust

Unnamed: 0,Date,Population,Annual Change Date
0,2008-12-31,1268000,4.28
1,2009-12-31,1321000,4.18
2,2010-12-31,1377000,4.24
3,2011-12-31,1434000,4.14
4,2012-12-31,1495000,4.25
5,2013-12-31,1558000,4.21
6,2014-12-31,1623000,4.17
7,2015-12-31,1692000,4.25
8,2016-12-31,1763000,4.2
9,2017-12-31,1838000,4.25


In [17]:
csv_path = Path("../Resources/New York City-population-2023-07-27.csv")
pd.read_csv(csv_path)
pop_ny=pd.read_csv(csv_path)
pop_ny

Unnamed: 0,Date,Population,Annual Change Date
0,2008-12-31,18254000,0.31
1,2009-12-31,18309000,0.3
2,2010-12-31,18365000,0.31
3,2011-12-31,18421000,0.3
4,2012-12-31,18478000,0.31
5,2013-12-31,18534000,0.3
6,2014-12-31,18591000,0.31
7,2015-12-31,18648000,0.31
8,2016-12-31,18705000,0.31
9,2017-12-31,18762000,0.3


In [9]:
plt.figure(figsize=(9, 5))
plt.plot(pop_hou['Date'], pop_hou['Population'], label='Houston')
plt.plot(pop_dfw['Date'], pop_dfw['Population'], label='Dallas FW')
plt.plot(pop_sa['Date'], pop_sa['Population'], label='San Antonio')
plt.plot(pop_aust['Date'], pop_aust['Population'], label='Austin')
plt.plot(pop_ny['Date'], pop_ny['Population'], label='NY City')

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Population')
plt.title('Combined Line Graph of Population')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()

# Show the plot
plt.show()

<IPython.core.display.Javascript object>

In [10]:
def plot_data_over_time(x_data, y_data, x_label, y_label, title):
    plt.figure(figsize=(9, 5))
    plt.plot(x_data, y_data, marker='o', linestyle='-', color='b')

    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.grid(True)

    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [11]:
x_data = pop_hou['Date']
y_data = pop_hou['Population']
x_label = 'Date'
y_label = 'Population'
title = 'Houston Population Over Time'

plot_data_over_time(x_data, y_data, x_label, y_label, title)

<IPython.core.display.Javascript object>

In [12]:
x_data = pop_dfw['Date']
y_data = pop_dfw['Population']
x_label = 'Date'
y_label = 'Population'
title = 'DFW Population Over Time'

plot_data_over_time(x_data, y_data, x_label, y_label, title)

<IPython.core.display.Javascript object>

In [15]:
x_data = pop_aust['Date']
y_data = pop_aust['Population']
x_label = 'Date'
y_label = 'Population'
title = 'Austin Population Over Time'

plot_data_over_time(x_data, y_data, x_label, y_label, title)

<IPython.core.display.Javascript object>

In [14]:
x_data = pop_sa['Date']
y_data = pop_sa['Population']
x_label = 'Date'
y_label = 'Population'
title = 'San Antonio Population Over Time'

plot_data_over_time(x_data, y_data, x_label, y_label, title)

<IPython.core.display.Javascript object>

In [13]:
x_data = pop_ny['Date']
y_data = pop_ny['Population']
x_label = 'Date'
y_label = 'Population'
title = 'NY City Population Over Time'

plot_data_over_time(x_data, y_data, x_label, y_label, title)

<IPython.core.display.Javascript object>