# Codebook  
**Authors:** Lauren Baker   
Documenting existing data files of DaanMatch with information about location, owner, "version", source etc.

In [1]:
import boto3
import numpy as np 
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
from collections import Counter
import statistics

In [2]:
client = boto3.client('s3')
resource = boto3.resource('s3')
my_bucket = resource.Bucket('daanmatchdatafiles')

# CSR Spent 17-18.xlsx

## TOC:
* [About this dataset](#1)
* [What's in this dataset](#2)
* [Codebook](#3)
    * [Missing values](#3.1)
    * [Summary statistics](#3.2)
* [Columns](#4)
    * [CIN](#4.1)
    * [COMPANY_NAME](#4.2)
    * [COMPANY_CLASS](#4.3)
    * [COMPANY_CATEGORY](#4.4)
    * [COMPANY_SUBCAT](#4.5)
    * [COMPANY_STATUS](#4.6)
    * [DATE_OF_REGISTRATION](#4.7)
    * [REGISTERED_STATE](#4.8)
    * [Authorized Capital  (Rs.)](#4.9)
    * [PAIDUP_CAPITAL (Rs.)](#4.10)
    * [PRINCIPAL_BUSINESS_ACTIVITY_CODE](#4.11)
    * [REGISTERED_OFFICE_ADDRESS](#4.12)
    * [EMAIL_ID](#4.13)
    * [LATEST ANNUAL REPORT FILING FY END DATE](#4.14)
    * [LATEST BALANCE SHEET FILING FY END DATE](#4.15)

**About this dataset**  <a class="anchor" id="1"></a>  
Data provided by: Unknown.  
Source: https://daanmatchdatafiles.s3.us-west-1.amazonaws.com/DaanMatch_DataFiles/CSR+Spent+17-18.xlsx  
Type: xlsx  
Last Modified: May 29, 2021, 19:54:24 (UTC-07:00)  
Size: 3.4 MB

In [3]:
path = "s3://daanmatchdatafiles/DaanMatch_DataFiles/CSR Spent 17-18.xlsx"
csr_spent_17_18 = pd.ExcelFile(path)
print(csr_spent_17_18.sheet_names)

['CSR Spent 17-18']


In [4]:
csr_spent_17_18 = csr_spent_17_18.parse('CSR Spent 17-18')
csr_spent_17_18.head()

Unnamed: 0,Url,Company Name,CSR Spent 17-18,Date of Incorporation,Class,RoC,Category,Sub Category,Listing Status,Registered Address,Zipcode,State,Email ID,Paid-up Capital (in INR Cr.),Authorized Capital (in INR Cr.)
0,https://csr.gov.in/companyprofile/dashboard.ph...,Reliance Industries Limited,7450400000,1973-05-08,Public,RoC-Mumbai,Company limited by Shares,Non-govt company,Listed,3 RD FLOORMAKER CHAMBER IV222 NARIMAN POINT 4...,400021.0,Maharashtra,sandeep.deshmukh@ril.com,"₹ 6,333.50","₹ 15,000.00"
1,https://csr.gov.in/companyprofile/dashboard.ph...,Oil And Natural Gas Corporation Limited,4820700000,1993-06-23,Public,RoC-Delhi,Company limited by Shares,Union Govt company,Listed,"Plot No. 5A- 5BNelson Mandela Road,Vasant Kunj...",110070.0,Delhi,secretariat@ongc.co.in,"₹ 6,416.62","₹ 15,000.00"
2,https://csr.gov.in/companyprofile/dashboard.ph...,Tata Consultancy Services Limited,4000000000,1995-01-19,Public,RoC-Mumbai,Company limited by Shares,Non-govt company,Listed,"9TH FLOOR, NIRMAL BUILDINGNARIMAN POINT 400021",400021.0,Maharashtra,rajendra.moholkar@tcs.com,₹ 197.04,₹ 565.08
3,https://csr.gov.in/companyprofile/dashboard.ph...,Hdfc Bank Limited,3745500000,1994-08-30,Public,RoC-Mumbai,Company limited by Shares,Non-govt company,Listed,HDFC BANK HOUSESENAPATI BAPAT MARGLOWER PAREL ...,400013.0,Maharashtra,sanjay.dongre@hdfcbank.com,₹ 517.61,₹ 650.00
4,https://csr.gov.in/companyprofile/dashboard.ph...,Indian Oil Corpn. Limited,3310100000,1959-06-30,Public,RoC-Mumbai,Company limited by Shares,Union Govt company,Listed,"Indian Oil Bhavan, G-9 All Yavar Jung Marg,BAN...",400051.0,Maharashtra,kgwalani@indianoil.in,"₹ 4,855.90","₹ 6,000.00"


**What's in this dataset?** <a class="anchor" id="2"></a>

In [5]:
print("Shape:", csr_spent_17_18.shape)
print("Rows:", csr_spent_17_18.shape[0])
print("Columns:", csr_spent_17_18.shape[1])
print("Each row is a company.")

Shape: (20975, 15)
Rows: 20975
Columns: 15
Each row is a company.


In [6]:
csr_spent_17_18_columns = [column for column in csr_spent_17_18.columns]
csr_spent_17_18_description = ["Corporate Identification Number in India (CIN) is a 21 digit alpha-numeric code issued to companies incorporated within India on being registered with Registrar of Companies (RCA).",
                                           "Name of Company.",
                                           "Class of Company: Private or Public.",
                                           "Category of Company: Limited by Shares, Limited by Guarantee, Unlimited Company.",
                                           "Subcategory of Company: Non-govt, Union Gtvt, State Govt, Subsidiary of Foreign Company, Guarantee and Association Company.",
                                           "Status of Company.",
                                           "Timestamp of date of registration: YYYY-MM-DD HH:MM:SS.",
                                           "State of registration.",
                                           "Authorized capital in rupees (Rs.).",
                                           "Paid up capital in rupees (Rs.).",
                                           "Principal Business code that classifies the main type of product/service sold.",
                                           "Address of registered office.",
                                           "Company email.",
                                           "Latest annual report filing fiscal year end date: YYYY-MM-DD.",
                                           "Latest balance sheet filing fiscal year end date: YYYY-MM-DD."]
csr_spent_17_18_dtypes = [dtype for dtype in csr_spent_17_18.dtypes]

data = {"Column Name": csr_spent_17_18_columns, "Description": csr_spent_17_18_description, "Type": csr_spent_17_18_dtypes}
csr_spent_17_18_codebook = pd.DataFrame(data)
csr_spent_17_18_codebook.style.set_properties(subset=['Description'], **{'width': '600px'})

Unnamed: 0,Column Name,Description,Type
0,Url,Corporate Identification Number in India (CIN) is a 21 digit alpha-numeric code issued to companies incorporated within India on being registered with Registrar of Companies (RCA).,object
1,Company Name,Name of Company.,object
2,CSR Spent 17-18,Class of Company: Private or Public.,int64
3,Date of Incorporation,"Category of Company: Limited by Shares, Limited by Guarantee, Unlimited Company.",object
4,Class,"Subcategory of Company: Non-govt, Union Gtvt, State Govt, Subsidiary of Foreign Company, Guarantee and Association Company.",object
5,RoC,Status of Company.,object
6,Category,Timestamp of date of registration: YYYY-MM-DD HH:MM:SS.,object
7,Sub Category,State of registration.,object
8,Listing Status,Authorized capital in rupees (Rs.).,object
9,Registered Address,Paid up capital in rupees (Rs.).,object
