# File Handling
- we have seen how to read/write to a text file previously
- we have used context managers to perform file operations
- now lets see how to handle some other files in python

# JSON
-  <b>J</b>ava<b>S</b>cript <b>O</b>bject <b>N</b>otation is a format for structuring data used by web applications to communicate with each other
- Characteristics of JSON
    - Human-readable and writable
    - light weight text based data interchange format
    - Language independent

## Task-1: Let's create a json file

## Task-2: Import json package

In [1]:
import json

## Task-3: Reading a json file to dictonary

In [6]:
with open('payload.json', 'r') as json_file:
    json_object = json.load(json_file)

In [7]:
json_object

{'Name': 'Sudheer',
 'Age': '22',
 'Company': 'JP Morgan Chase & Co',
 'skills': [{'name': 'python', 'experience': '3 years'},
  {'name': 'Machine learning', 'experience': '2 years'}]}

In [9]:
print(type(json_object))

<class 'dict'>


In [10]:
json_object['Name']

'Sudheer'

## Task-4 Writing a dictionary to json

In [11]:
biodata = {
    "Name": "Sudheer",
    "Age": 22,
    "Location": "Hyderabad, India"
}

In [13]:
with open('biodata.json', 'w') as json_file:
    json.dump(biodata, json_file)

## Task-5 string to dict, dict to str - possible if api is returning json as string

In [19]:
str_dict = '{"sid":"f685036", "name": "Sudheer", "department":"A&R"}'
dict_obj = json.loads(str_dict)

In [20]:
dict_obj

{'sid': 'f685036', 'name': 'Sudheer', 'department': 'A&R'}

In [21]:
print(type(dict_obj))

<class 'dict'>


In [22]:
str_obj = json.dumps(dict_obj)

In [23]:
str_obj

'{"sid": "f685036", "name": "Sudheer", "department": "A&R"}'

In [24]:
print(type(str_obj))

<class 'str'>


# XML

# CSV

## Importing csv package

In [27]:
import csv

## Read csv

In [35]:
with open('example.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        print(row)

['Name', ' Age', ' Company', ' SID']
['Sudheer', ' 22', ' JPMC', ' F685036']


In [36]:
column_names = ['Name', ' Age', ' Company', ' SID']
rows = [['Sudheer', ' 22', ' JPMC', ' F685036'],['Someone', ' 25', ' JPMC', ' F685037']]

In [39]:
with open('sample.csv', 'w') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(column_names)
    csv_writer.writerows(rows)

# Use Openpyxl for excel opertaions
- everything that can be done by human in excel can be performed by this excel engine

# Simpler ways to reading csv and excel files
# Pandas

In [41]:
import pandas as pd

In [42]:
dataframe = pd.read_csv('sample.csv')

In [43]:
dataframe.head()

Unnamed: 0,Name,Age,Company,SID
0,Sudheer,22,JPMC,F685036
1,Someone,25,JPMC,F685037


In [45]:
dataframe.columns

Index(['Name', ' Age', ' Company', ' SID'], dtype='object')

In [46]:
dataframe.Name

0    Sudheer
1    Someone
Name: Name, dtype: object

In [48]:
dataframe.iloc[0,0]

'Sudheer'

In [49]:
dataframe.iloc[:1,:2]

Unnamed: 0,Name,Age
0,Sudheer,22


In [50]:
print(dir(pd))

['BooleanDtype', 'Categorical', 'CategoricalDtype', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'DatetimeTZDtype', 'ExcelFile', 'ExcelWriter', 'Flags', 'Float32Dtype', 'Float64Dtype', 'Float64Index', 'Grouper', 'HDFStore', 'Index', 'IndexSlice', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype', 'Int64Index', 'Int8Dtype', 'Interval', 'IntervalDtype', 'IntervalIndex', 'MultiIndex', 'NA', 'NaT', 'NamedAgg', 'Period', 'PeriodDtype', 'PeriodIndex', 'RangeIndex', 'Series', 'SparseDtype', 'StringDtype', 'Timedelta', 'TimedeltaIndex', 'Timestamp', 'UInt16Dtype', 'UInt32Dtype', 'UInt64Dtype', 'UInt64Index', 'UInt8Dtype', '__builtins__', '__cached__', '__doc__', '__docformat__', '__file__', '__getattr__', '__git_version__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_hashtable', '_is_numpy_dev', '_lib', '_np_version_under1p17', '_np_version_under1p18', '_testing', '_tslib', '_version', 'api', 'array', 'arrays', 'bdate_range', 'concat', 'core', 'c

# Lot of things can be performed with pandas
- Group by, merge, concat, plotting, pivot, reading databases and lot more
- best used for data analysis