# Codebook  
**Authors:** Patrick Guo  
Documenting existing data files of DaanMatch with information about location, owner, "version", source etc.

In [1]:
import boto3
import pandas as pd
import numpy as np 
import os
from collections import Counter
import matplotlib.pyplot as plt
import statistics

In [2]:
client = boto3.client('s3')
resource = boto3.resource('s3')
my_bucket = resource.Bucket('my-bucket')

# Andhra_Pradesh_Gram_Panchayat

## TOC:
* [About this dataset](#1)
* [What's in this dataset](#2)
* [Codebook](#3)
* [Columns](#4)
    * [S. No.](#4.1)
    * [District](#4.2)
    * [Mandal](#4.3)
    * [Panchayat](#4.4)
    * [Gram Panchayat Special officer Name](#4.5)
    * [Mobile Number](#4.6)
    * [Address for Communication](#4.7)

**About this dataset**  <a class="anchor" id="1"></a>  
Data provided by: The Ministry of Panchayati Raj  
Source: https://www.panchayat.gov.in/documents/20126/75657/Andhra+Pradesh+Gram+Panchayat.xlsx/e1b39a3d-d726-fd91-68c8-3d7cd42755a4?t=1556532428214  
Last modified: May 29, 2021, 19:53:36 (UTC-07:00)
Size: 894.3 KB 

In [17]:
path = "s3://daanmatchdatafiles/DaanMatch_DataFiles/Andhra Pradesh Gram Panchayat.xlsx"
xl = pd.ExcelFile(path)
print(xl.sheet_names)
# Load sheet
AndhraPradeshGramPanchayat = xl.parse("Gram Panchayat")
# Reset index
AndhraPradeshGramPanchayat.head()

['Gram Panchayat']


Unnamed: 0,S. No.,District,Mandal,Panchayat,Gram Panchayat Special officer Name,Mobile Number,Address for Communication
0,1,Ananthapur,Ananthapur,A.Narayanapuram,Anwar Basha,9496189000.0,Sarpanch\nA.Narayanapuram Gram Panchayat \nAt-...
1,2,Ananthapur,Ananthapur,Akuthotapalli,C Obulamma,9490610000.0,Sarpanch\nAkuthotapalli Gram Panchayat \nAt- A...
2,3,Ananthapur,Ananthapur,Alamuru,SURESH,8790998000.0,Sarpanch\nAlamuru Gram Panchayat \nAt- Alamuru...
3,4,Ananthapur,Ananthapur,Anantapur(R),H M Basha,9703327000.0,Sarpanch\nAnantapur(R) Gram Panchayat \nAt- An...
4,5,Ananthapur,Ananthapur,Ankampalli,P Venkatesh Kumar,9985021000.0,Sarpanch\nAnkampalli Gram Panchayat \nAt- Anka...


**What's in this dataset?** <a class="anchor" id="2"></a>

In [13]:
print("Shape:", AndhraPradeshGramPanchayat.shape)
print("Rows:", AndhraPradeshGramPanchayat.shape[0])
print("Columns:", AndhraPradeshGramPanchayat.shape[1])
print("Each row is a Gram Panchayat.")

Shape: (12918, 7)
Rows: 12918
Columns: 7
Each row is a Gram Panchayat.


In [15]:
AndhraPradeshGramPanchayat_columns = [column for column in AndhraPradeshGramPanchayat_df.columns]
AndhraPradeshGramPanchayat_columns

['S. No.',
 'District',
 'Mandal',
 'Panchayat',
 'Gram Panchayat Special officer Name',
 'Mobile Number',
 'Address for Communication']

**Codebook** <a class="anchor" id="3"></a>

In [23]:
AndhraPradeshGramPanchayat_columns = [column for column in AndhraPradeshGramPanchayat_df.columns]
AndhraPradeshGramPanchayat_description = ["Index",
                                          "District name",
                                          "Mandal (local government area)", 
                                          "Panchayat (village)", 
                                          "Name of Gram Panchayat special officer", 
                                          "Mobile Number", 
                                          "Communication Address"]
AndhraPradeshGramPanchayat_dtypes = [dtype for dtype in AndhraPradeshGramPanchayat_df.dtypes]

data = {"Column Name": AndhraPradeshGramPanchayat_columns, "Description": AndhraPradeshGramPanchayat_description, "Type": AndhraPradeshGramPanchayat_dtypes}
AndhraPradeshGramPanchayat_codebook = pd.DataFrame(data)
AndhraPradeshGramPanchayat_codebook

Unnamed: 0,Column Name,Description,Type
0,S. No.,Index,int64
1,District,District name,object
2,Mandal,Mandal (local government area),object
3,Panchayat,Panchayat (village),object
4,Gram Panchayat Special officer Name,Name of Gram Panchayat special officer,object
5,Mobile Number,Mobile Number,float64
6,Address for Communication,Communication Address,object


## Columns
<a class="anchor" id="4"></a>

In [38]:
AndhraPradeshGramPanchayat.isnull().sum()

S. No.                                  0
District                                0
Mandal                                  0
Panchayat                               0
Gram Panchayat Special officer Name    17
Mobile Number                          21
Address for Communication               0
dtype: int64

### S. No.
<a class="anchor" id="4.1"></a>
Index

In [25]:
column = AndhraPradeshGramPanchayat["S. No."]
column

0            1
1            2
2            3
3            4
4            5
         ...  
12913    12914
12914    12915
12915    12916
12916    12917
12917    12918
Name: S. No., Length: 12918, dtype: int64

In [26]:
# Check for duplicates
counter = dict(Counter(column))
duplicates = { key:value for key, value in counter.items() if value > 1}
print("Duplicates:", duplicates)

Duplicates: {}


### District
<a class="anchor" id="4.2"></a>
District name

In [28]:
column = AndhraPradeshGramPanchayat["District"]
column

0          Ananthapur
1          Ananthapur
2          Ananthapur
3          Ananthapur
4          Ananthapur
             ...     
12913    YSR District
12914    YSR District
12915    YSR District
12916    YSR District
12917    YSR District
Name: District, Length: 12918, dtype: object

In [29]:
print("Unique values:", column.unique())

# Number of empty strings
print("Empty strings:", sum(column == " "))

# Table of number of each class
counter = dict(Counter(column))
count = { key:[value] for key, value in counter.items()}
table = pd.DataFrame.from_dict(count).rename(columns={" ": "Empty"})
table = table.melt(var_name="Class", value_name="Count")
table

Unique values: ['Ananthapur' 'Chittoor' 'East Godavari' 'Guntur' 'Krishna' 'Kurnool'
 'Prakasam' 'SPSR Nellore' 'Srikakulam' 'Visakhapatnam' 'Vizianagaram'
 'West Godavari' 'YSR District']
Empty strings: 0


Unnamed: 0,Class,Count
0,Ananthapur,1003
1,Chittoor,1363
2,East Godavari,1069
3,Guntur,1011
4,Krishna,970
5,Kurnool,889
6,Prakasam,1028
7,SPSR Nellore,940
8,Srikakulam,1100
9,Visakhapatnam,925


### Mandal
<a class="anchor" id="4.3"></a>
Mandal (local government area)

### Panchayat
<a class="anchor" id="4.4"></a>
Panchayat (village)

### Gram Panchayat Special officer Name
<a class="anchor" id="4.5"></a>
Name of Gram Panchayat special officer

### Mobile Number
<a class="anchor" id="4.6"></a>
Mobile Number

### Address for Communication
<a class="anchor" id="4.7"></a>
Communication Address