In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Define paths
import os
zip_file_path = '/content/drive/MyDrive/complaints.csv.zip'
extract_dir = '/content/extracted_files'
os.makedirs(extract_dir, exist_ok=True)



In [3]:
# Extract the zip file
import zipfile
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)



In [4]:

# Read the CSV file from the extracted files
import pandas as pd
csv_file_path = os.path.join(extract_dir, 'complaints.csv')
df = pd.read_csv(csv_file_path)
print(df.head())

  Date received                                            Product  \
0    2024-02-26  Credit reporting or other personal consumer re...   
1    2024-02-26  Credit reporting or other personal consumer re...   
2    2024-02-26  Credit reporting or other personal consumer re...   
3    2024-02-26  Credit reporting or other personal consumer re...   
4    2024-02-26  Credit reporting or other personal consumer re...   

        Sub-product                                 Issue  \
0  Credit reporting           Improper use of your report   
1  Credit reporting  Incorrect information on your report   
2  Credit reporting  Incorrect information on your report   
3  Credit reporting  Incorrect information on your report   
4  Credit reporting           Improper use of your report   

                                       Sub-issue Consumer complaint narrative  \
0  Reporting company used your report improperly                          NaN   
1            Information belongs to someone else  

In [7]:
# Define the path where you want to save the file in Google Drive
output_path = '/content/drive/MyDrive/processed_complaints.csv'

# Save the DataFrame as a CSV file to the specified path in Google Drive
df.to_csv(output_path, index=False)



In [5]:
df.head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Improper use of your report,Reporting company used your report improperly,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",MA,2115,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8424890
1,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",FL,32953,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8421008
2,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",CA,93280,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8420474
3,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",NJ,8201,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8416259
4,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Improper use of your report,Reporting company used your report improperly,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",TN,38305,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8420946


In [6]:
df.shape

(5358365, 18)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5358365 entries, 0 to 5358364
Data columns (total 18 columns):
 #   Column                        Dtype 
---  ------                        ----- 
 0   Date received                 object
 1   Product                       object
 2   Sub-product                   object
 3   Issue                         object
 4   Sub-issue                     object
 5   Consumer complaint narrative  object
 6   Company public response       object
 7   Company                       object
 8   State                         object
 9   ZIP code                      object
 10  Tags                          object
 11  Consumer consent provided?    object
 12  Submitted via                 object
 13  Date sent to company          object
 14  Company response to consumer  object
 15  Timely response?              object
 16  Consumer disputed?            object
 17  Complaint ID                  int64 
dtypes: int64(1), object(17)
memory usage: 735.

In [8]:
df.isnull().sum()

Date received                         0
Product                               0
Sub-product                      235294
Issue                                 5
Sub-issue                        737797
Consumer complaint narrative    3482998
Company public response         2781194
Company                               0
State                             46005
ZIP code                          30225
Tags                            4867806
Consumer consent provided?      1048510
Submitted via                         0
Date sent to company                  0
Company response to consumer         15
Timely response?                      0
Consumer disputed?              4590049
Complaint ID                          0
dtype: int64

In [11]:
df

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Improper use of your report,Reporting company used your report improperly,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",MA,02115,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8424890
1,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",FL,32953,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8421008
2,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",CA,93280,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8420474
3,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",NJ,08201,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8416259
4,2024-02-26,Credit reporting or other personal consumer re...,Credit reporting,Improper use of your report,Reporting company used your report improperly,,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",TN,38305,,Consent not provided,Web,2024-02-26,Closed with non-monetary relief,Yes,,8420946
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5358360,2013-06-04,Credit card,,Billing disputes,,,,"BANK OF AMERICA, NATIONAL ASSOCIATION",OH,43318,,,Web,2013-06-05,Closed with explanation,Yes,No,424184
5358361,2011-12-30,Credit card,,Identity theft / Fraud / Embezzlement,,,,JPMORGAN CHASE & CO.,FL,33436,,,Referral,2011-12-30,Closed without relief,No,No,69708
5358362,2013-04-23,Mortgage,Other mortgage,"Loan servicing, payments, escrow account",,,,"BANKUNITED, NATIONAL ASSOCIATION",FL,34280,,,Referral,2013-04-23,Closed with explanation,Yes,No,390852
5358363,2013-03-05,Mortgage,Conventional fixed mortgage,"Application, originator, mortgage broker",,,,"Prospect Mortgage, LLC",NY,11433,,,Web,2013-03-05,Closed with explanation,Yes,No,341832


In [12]:
import seaborn as sns
import matplotlib.pyplot as plt
