https://catalog.data.gov/dataset/public-company-bankruptcy-cases-opened-and-monitored

Data updated June 18, 2019


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

# Extract

In [2]:
# Load CSV files

data09 = "bankruptcy_data/public_company_bankruptcy_cases_2009.csv"
data10 = "bankruptcy_data/public_company_bankruptcy_cases_2010.csv"
data11 = "bankruptcy_data/public_company_bankruptcy_cases_2011.csv"

In [3]:
# Read all data files and store into Pandas DataFrame

data09_df = pd.read_csv(data09)
data10_df = pd.read_csv(data10)
data11_df = pd.read_csv(data11)


# Transform 

In [4]:
# Add fiscal year to each data frame

data09_df['FISCAL_YEAR'] = 2009
data10_df['FISCAL_YEAR'] = 2010
data11_df['FISCAL_YEAR'] = 2011

In [5]:
# Drop NAN values

data09_df = data09_df.dropna()
data10_df = data10_df.dropna()
data11_df = data11_df.dropna()


In [6]:
# Rearrenge columns of all data and reaname columns

data09_df = data09_df[['FISCAL_YEAR','STATE', 'COMPANY NAME', 'ASSETS (MILLIONS)','LIABILITIES (MILLIONS)', 'DISTRICT']] 
data09_df = data09_df.rename(columns= {'COMPANY NAME': 'COMPANY', 'ASSETS (MILLIONS)': 'ASSETS_MILLIONS', 'LIABILITIES (MILLIONS)':'LIABILITIES_MILLIONS', 'DISTRICT': 'COURT_DISTRICT'})

data10_df = data10_df[['FISCAL_YEAR','STATE', 'COMPANY NAME', 'ASSETS (MILLIONS)','LIABILITIES (MILLIONS)', 'DISTRICT']] 
data10_df = data10_df.rename(columns= {'COMPANY NAME': 'COMPANY', 'ASSETS (MILLIONS)': 'ASSETS_MILLIONS', 'LIABILITIES (MILLIONS)':'LIABILITIES_MILLIONS', 'DISTRICT': 'COURT_DISTRICT'})

data11_df = data11_df[['FISCAL_YEAR','STATE', 'COMPANY NAME', 'ASSETS (MILLIONS)','LIABILITIES (MILLIONS)', 'DISTRICT']] 
data11_df = data11_df.rename(columns= {'COMPANY NAME': 'COMPANY', 'ASSETS (MILLIONS)': 'ASSETS_MILLIONS', 'LIABILITIES (MILLIONS)':'LIABILITIES_MILLIONS', 'DISTRICT': 'COURT_DISTRICT'})

In [7]:
data09_df.info()
data10_df.info()
data11_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 127 entries, 0 to 128
Data columns (total 6 columns):
FISCAL_YEAR             127 non-null int64
STATE                   127 non-null object
COMPANY                 127 non-null object
ASSETS_MILLIONS         127 non-null float64
LIABILITIES_MILLIONS    127 non-null float64
COURT_DISTRICT          127 non-null object
dtypes: float64(2), int64(1), object(3)
memory usage: 6.9+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 63 entries, 1 to 63
Data columns (total 6 columns):
FISCAL_YEAR             63 non-null int64
STATE                   63 non-null object
COMPANY                 63 non-null object
ASSETS_MILLIONS         63 non-null object
LIABILITIES_MILLIONS    63 non-null object
COURT_DISTRICT          63 non-null object
dtypes: int64(1), object(5)
memory usage: 3.4+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 62 entries, 0 to 61
Data columns (total 6 columns):
FISCAL_YEAR             62 non-null int64
STATE           

In [8]:
data10_df.dtypes

# Kore Holdings, Inc.
# Bridgetech Holdings International, Inc.
# Java Detour, Inc.
# Law Enforcement Associates Corporation
# Sand Spring Capital


FISCAL_YEAR              int64
STATE                   object
COMPANY                 object
ASSETS_MILLIONS         object
LIABILITIES_MILLIONS    object
COURT_DISTRICT          object
dtype: object

In [9]:
data10_df[data10_df['COMPANY'] == 'Spongetech Delivery Systems, Inc.']

Unnamed: 0,FISCAL_YEAR,STATE,COMPANY,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
51,2010,NY,"Spongetech Delivery Systems, Inc.",0.5,----,SD


In [10]:
data11_df[data11_df['COMPANY'] == 'Bridgetech Holdings International, Inc.']

Unnamed: 0,FISCAL_YEAR,STATE,COMPANY,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
10,2011,CA,"Bridgetech Holdings International, Inc.",--,7,SD


In [11]:
data10_df = data10_df.set_index('COMPANY')
data10_df = data10_df.drop('Spongetech Delivery Systems, Inc.', axis = 0)
data10_df = data10_df.drop('Kore Holdings, Inc.', axis = 0)
data10_df = data10_df.drop('U.S. Dry Cleaning Services Corporation', axis = 0)

data10_df= data10_df.reset_index()

In [12]:
data11_df = data11_df.set_index('COMPANY')
data11_df = data11_df.drop('Bridgetech Holdings International, Inc.', axis = 0)
data11_df = data11_df.drop('Java Detour, Inc.', axis = 0)
data11_df = data11_df.drop('Law Enforcement Associates Corporation', axis = 0)
data11_df = data11_df.drop('Sand Spring Capital', axis = 0)

data11_df= data11_df.reset_index()

In [13]:
data10_df[data10_df['COMPANY'] == 'Spongetech Delivery Systems, Inc.']

Unnamed: 0,COMPANY,FISCAL_YEAR,STATE,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT


In [14]:
data11_df[data11_df['COMPANY'] == 'Bridgetech Holdings International, Inc.']

Unnamed: 0,COMPANY,FISCAL_YEAR,STATE,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT


In [15]:
data10_df['ASSETS_MILLIONS'] = data10_df['ASSETS_MILLIONS'].str.replace(",","",regex = True).astype(float)
data10_df['LIABILITIES_MILLIONS'] = data10_df['LIABILITIES_MILLIONS'].str.replace(",","",regex = True).astype(float)


In [16]:
data10_df.dtypes
data10_df.head(10000)

Unnamed: 0,COMPANY,FISCAL_YEAR,STATE,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
0,Advanta Group,2010,DE,363.0,331.0,D
1,"Amcore Financial, Inc.",2010,IL,7.0,75.0,ND
2,American Mortgage Acceptance Company,2010,NY,6.37,119.968,SD
3,"Barzel Industries, Inc.",2010,DE,366.0,385.0,D
4,Baseline Oil & Gas Corp.,2010,TX,80.0,139.0,SD
5,Blockbuster,2010,NY,1017.04,1464.94,SD
6,"BSML, Inc.",2010,FL,6.94,9.97,SD
7,"California Coastal Communities, Inc.",2010,CA,291.0,231.0,CD
8,"Canopy Financial, Inc.",2010,IL,18.99,25.84,ND
9,Capital Growth Systems Inc.,2010,DE,26.97,17.146,D


In [17]:
data11_df['ASSETS_MILLIONS'] = data11_df['ASSETS_MILLIONS'].str.replace(",","",regex = True).astype(float)
data11_df['LIABILITIES_MILLIONS'] = data11_df['LIABILITIES_MILLIONS'].str.replace(",","",regex = True).astype(float)

In [18]:
# Append data for all three years
appen1 =  data09_df.append(data10_df, ignore_index = True, sort = "false")
appen2 =  appen1.append(data11_df, ignore_index = True, sort = "false")

# Rearrenge columns of all data and reaname columns

appen2 = appen2[['FISCAL_YEAR','STATE', 'COMPANY', 'ASSETS_MILLIONS','LIABILITIES_MILLIONS', 'COURT_DISTRICT']] 

appen2.dtypes


FISCAL_YEAR               int64
STATE                    object
COMPANY                  object
ASSETS_MILLIONS         float64
LIABILITIES_MILLIONS    float64
COURT_DISTRICT           object
dtype: object

In [19]:
data09_clean = data09_df
data10_clean = data10_df
data11_clean = data11_df

combined_data = appen2

data09_clean.to_csv("bankruptcy_data/data09_clean.csv", index=False, encoding='utf8')
data10_clean.to_csv("bankruptcy_data/data10_clean.csv", index=False, encoding='utf8')
data11_clean.to_csv("bankruptcy_data/data11_clean.csv", index=False, encoding='utf8')
combined_data.to_csv("bankruptcy_data/combined_data.csv", index=False, encoding='utf8')

data09_clean.to_json("bankruptcy_data/data09_clean.json", orient='columns')
data10_clean.to_json("bankruptcy_data/data10_clean.json", orient='columns')
data11_clean.to_json("bankruptcy_data/data11_clean.json", orient='columns')
combined_data.to_json("bankruptcy_data/combined_data.json", orient='columns')

# Load

In [20]:
from sqlalchemy import create_engine
import secret

In [21]:
# connect to local database 
engine = create_engine(f"postgresql://{secret.user_pass}@localhost:5432/bankruptcyDB")
# connection = engine.connect()

In [22]:
engine.table_names()

['data2009', 'data2010', 'data2011', 'combined_data']

In [24]:
#  Use pandas to load csv converted DataFrame into database
data09_clean.to_sql(name='data2009', con=engine, if_exists='append', index=False)
data10_clean.to_sql(name='data2010', con=engine, if_exists='append', index=False)
data11_clean.to_sql(name='data2011', con=engine, if_exists='append', index=False)
combined_data.to_sql(name='combined_data', con=engine, if_exists='append', index=False)

In [25]:
# Confirm data has been added by querying the tables

pd.read_sql_query('select * from combined_data', con=engine).head()


Unnamed: 0,FISCAL_YEAR,STATE,COMPANY,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
0,2009,FL,"A21, Inc.",25.2,30.3,MD
1,2009,DE,Abitibibowater Inc.,9937.0,2213.0,D
2,2009,FL,"Accentia Biopharmaceuticals, Inc.",134.9,77.6,MD
3,2009,DE,Accredited Home Lenders Holding Co.,799.5,490.7,D
4,2009,NY,Apex Silver Mines LTD,721.3,930.9,SD


In [26]:
pd.read_sql_query('select * from data2009', con=engine).head()

Unnamed: 0,FISCAL_YEAR,STATE,COMPANY,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
0,2009,FL,"A21, Inc.",25.2,30.3,MD
1,2009,DE,Abitibibowater Inc.,9937.0,2213.0,D
2,2009,FL,"Accentia Biopharmaceuticals, Inc.",134.9,77.6,MD
3,2009,DE,Accredited Home Lenders Holding Co.,799.5,490.7,D
4,2009,NY,Apex Silver Mines LTD,721.3,930.9,SD


In [27]:
pd.read_sql_query('select * from data2010', con=engine).head()

Unnamed: 0,COMPANY,FISCAL_YEAR,STATE,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
0,Advanta Group,2010,DE,363.0,331.0,D
1,"Amcore Financial, Inc.",2010,IL,7.0,75.0,ND
2,American Mortgage Acceptance Company,2010,NY,6.37,119.968,SD
3,"Barzel Industries, Inc.",2010,DE,366.0,385.0,D
4,Baseline Oil & Gas Corp.,2010,TX,80.0,139.0,SD


In [28]:
pd.read_sql_query('select * from data2011', con=engine).head()

Unnamed: 0,COMPANY,FISCAL_YEAR,STATE,ASSETS_MILLIONS,LIABILITIES_MILLIONS,COURT_DISTRICT
0,"Ad Systems Communications, Inc.",2011,NV,405.0,4.0,D
1,Alphatrade.com,2011,NV,686.0,4.0,D
2,AMBAC Financial Group,2011,NY,395.0,1683.0,SD
3,"Ambassdors International , Inc.",2011,DE,86.44,87.32,D
4,American Pacific Financial Corporation,2011,NV,19.18,161.08,D


In [31]:
# Create a table view of all bankruptcy data from 2009 to 2011

pd.read_sql_query('create or replace view bankruptcy_data as select * from combined_data', con = engine)

ResourceClosedError: This result object does not return rows. It has been closed automatically.

In [None]:
# from pymongo import MongoClient
# import sys
# import json

In [None]:
# # https://realpython.com/introduction-to-mongodb-and-python/
# # Establish a connection

# client = MongoClient('mongodb://localhost:27017') ## Specify the port number

# # Create or access a database. By default if the database is not found, it will created

# db = client.bankruptcyDB


In [None]:
# # Transform all dataframe to dictionaries

# data09_clean_dict = data09_clean.to_dict('series')
# data10_clean_dict = data10_clean.to_dict('series')
# data11_clean_dict = data11_clean.to_dict('series')


# print(data10_clean_dict)


In [None]:
# combined_data_dict = combined_data.to_dict('series')

In [None]:
# combined_data_dict

In [None]:
# # Create a collection in db (aka Table)

# data09_collection = db.data2009.find()
# data10_collection = db.data2010.find()
# data11_collection = db.data2011.find()

In [None]:
# result10 = data10_collection.insert_one(data10_clean_dict)


In [None]:
# def import_content(bankruptcy_data/combined_data.csv):
#     mng_client = pymongo.MongoClient('localhost', 27017)
#     mng_db = mng_client['PC_Bankruptcy09_11'] // Replace mongo db name
#     collection_name = 'CombinedData' // Replace mongo db collection name
#     db_cm = mng_db[CombinedData]
#     cdir = os.path.dirname(__file__)
#     file_res = os.path.join(cdir, filepath)

#     data = pd.read_csv(file_res)
#     data_json = json.loads(data.to_json(orient='records'))
#     db_cm.remove()
#     db_cm.insert(data_json)

# if __name__ == "__main__":
#   filepath = 'bankruptcy_data/combined_data.csv'  // pass csv file path
#   import_content(filepath)

# New heading

# New heading