# Total Employment Trend in Franklin and Delaware Countires

## Introduction

(Blurb Here)

### Process Outline

(Steps here:)

## Setup

### Import required package

In [72]:
import csv
import os
import urllib.request
import urllib
import pandas as pd
import json
from tableschema import Table

### Parameters

In [43]:
# Define input and output directories
INPUT_DIR = "./input_data"
OUTPUT_DIR = "./output_data"

### Define inputs

#### (Input title)

In [78]:
COMBINED_TABLE_FILENAME = "combined.csv"
COMBINED_TABLE_PATH = os.path.join(INPUT_DIR, COMBINED_TABLE_FILENAME)
COMBINED_TABLE_SCHEMA_FILENAME = COMBINED_TABLE_FILENAME.replace(".csv","_schema.json")
COMBINED_TABLE_SCHEMA_PATH = os.path.join(INPUT_DIR, COMBINED_TABLE_SCHEMA_FILENAME)
print("Data: {}".format(COMBINED_TABLE_PATH))
print("Schema: {}".format(COMBINED_TABLE_SCHEMA_PATH))

Data: ./input_data\combined.csv
Schema: ./input_data\combined_schema.json


In [None]:
OUTPUT_TABLE_FILENAME = "compiled.xlsx"
OUTPUT_TABLE_PATH = os.path.join(OUTPUT_DIR, OUTPUT_TABLE_FILENAME)
print("Output data path: {}".format(OUTPUT_TABLE_PATH))

### Define outputs

#### (Output title)

#### Define inputs

## Getting input data

In [83]:
# *******************************************************************************
# qcewCreateDataRows : This function takes a raw csv string and splits it into
# a two-dimensional array containing the data and the header row of the csv file
# a try/except block is used to handle for both binary and char encoding
def qcewCreateDataRows(csv):
    dataRows = []
    try: dataLines = csv.decode().split('\r\n')
    except er: dataLines = csv.split('\r\n');
    for row in dataLines:
        dataRows.append(row.split(','))
    return dataRows
# *******************************************************************************



# *******************************************************************************
# qcewGetAreaData : This function takes a year, quarter, and area argument and
# returns an array containing the associated area data. Use 'a' for annual
# averages. 
# For all area codes and titles see:
# http://www.bls.gov/cew/doc/titles/area/area_titles.htm
#
def qcewGetAreaData(year,qtr,area):
    urlPath = "http://data.bls.gov/cew/data/api/[YEAR]/[QTR]/area/[AREA].csv"
    urlPath = urlPath.replace("[YEAR]",year)
    urlPath = urlPath.replace("[QTR]",qtr.lower())
    urlPath = urlPath.replace("[AREA]",area.upper())
    httpStream = urllib.request.urlopen(urlPath)
    csv = httpStream.read()
    httpStream.close()
    return qcewCreateDataRows(csv)


def fetch_and_combine_qcew_data(start_year, end_year, qtr, area):
    all_years_data = []
    for year in range(start_year, end_year + 1):
        print(f"Fetching data for {year}...")
        year_data = qcewGetAreaData(str(year), qtr, area)
        columns_no_quotes = [col.replace('"', '') for col in year_data[0]]
        year_data_df = pd.DataFrame(year_data[1:], columns=columns_no_quotes)
        # Add a column to distinguish data by year
        year_data_df['year'] = year
        # Remove every instance of " in the data
        year_data_df = year_data_df.replace('"', '', regex=True)
        all_years_data.append(year_data_df)
        
    # Combine all DataFrame objects into a single DataFrame
    combined_df = pd.concat(all_years_data, ignore_index=True)
    
    # Apply filtering: retain rows where industry_code == "10" and own_code == "0"
    filtered_df = combined_df.loc[(combined_df['industry_code'] == "10") & (combined_df['own_code'] == "0")]

    
    return filtered_df

# Fetch and combine the data
combined_franklin_data = fetch_and_combine_qcew_data(2014, 2021, "a", "39049")

# Fetch and combine the data
combined_delaware_data = fetch_and_combine_qcew_data(2014, 2021, "a", "39041")

# Combine the DataFrames
combined_df = pd.concat([combined_franklin_data, combined_delaware_data], ignore_index=True)

combined_df.to_csv(COMBINED_TABLE_PATH, index=False)

print("The CSV files have been combined and saved.")
print(f"Processed data saved to {COMBINED_TABLE_PATH}")


# Create table
table = Table(COMBINED_TABLE_PATH)

# Infer table variable types
table.infer()

# Convert missing values to 'N/A'
table.schema.descriptor['missingValues'] = ['N/A', '']
table.schema.commit()

# Save schema
table.schema.save(COMBINED_TABLE_SCHEMA_PATH)



# Load the schema from the JSON file
with open(COMBINED_TABLE_SCHEMA_PATH, 'r') as file:
    schema = json.load(file)

# Define a function to convert data types based on the schema
def convert_data_types(row, schema):
    converted_row = {}
    for field in schema['fields']:
        field_name = field['name']
        field_type = field['type']
        if field_name in row:
            if field_type == 'int':
                converted_row[field_name] = int(row[field_name])
            elif field_type == 'string':
                converted_row[field_name] = str(row[field_name])
            # Add more type conversions as needed
            else:
                converted_row[field_name] = row[field_name]  # Keep as is if type not recognized
    return converted_row

# Read the input data, apply conversion, and process it
converted_data = []

with open(COMBINED_TABLE_PATH, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        converted_row = convert_data_types(row, schema)
        converted_data.append(converted_row)

# Preview migration change type table
df = pd.read_csv(COMBINED_TABLE_PATH)
df.head()

# Aggregate rows by "year" and sum up all other numeric columns
aggregated_df = df.groupby('year').sum().reset_index()

aggregated_df.to_csv(COMBINED_TABLE_PATH, index=False)

print("The data has been aggregated by year and saved.")

Fetching data for 2014...
Fetching data for 2015...
Fetching data for 2016...
Fetching data for 2017...
Fetching data for 2018...
Fetching data for 2019...
Fetching data for 2020...
Fetching data for 2021...
Fetching data for 2014...
Fetching data for 2015...
Fetching data for 2016...
Fetching data for 2017...
Fetching data for 2018...
Fetching data for 2019...
Fetching data for 2020...
Fetching data for 2021...
The CSV files have been combined and saved.
Processed data saved to ./input_data\combined.csv
The data has been aggregated by year and saved.
