# Importing data from external source (csv, excel, json)

In [1]:
import pandas as pd

In [7]:
df = pd.read_csv('./data/data1.csv')
df

Unnamed: 0.1,Unnamed: 0,name,age,height
0,0,Jams,32,109
1,1,John,35,121
2,2,Jane,41,136
3,3,Bob,20,170
4,4,Alice,23,112
5,5,Mike,47,158
6,6,Dan,23,165


- If column name is not provided in csv file, then first record will be treated as column names. To avoid this, we can use pandas read_csv function with header=None and names parameter to specify column names

In [9]:
df = pd.read_csv('./data/data1_1.csv', header=None)
df

Unnamed: 0,0,1,2,3
0,0,Jams,32,109
1,1,John,35,121
2,2,Jane,41,136
3,3,Bob,20,170
4,4,Alice,23,112
5,5,Mike,47,158
6,6,Dan,23,165


In [12]:
df = pd.read_csv(
    './data/data1_1.csv', 
    header=None,
    names=['sn', 'name', 'age', 'height']
)
df

Unnamed: 0,sn,name,age,height
0,0,Jams,32,109
1,1,John,35,121
2,2,Jane,41,136
3,3,Bob,20,170
4,4,Alice,23,112
5,5,Mike,47,158
6,6,Dan,23,165


In [14]:
df = pd.read_csv('./data/data2.csv', sep='\t')
df

Unnamed: 0.1,Unnamed: 0,name,age,height
0,0,Jams,32,109
1,1,John,35,121
2,2,Jane,41,136
3,3,Bob,20,170
4,4,Alice,23,112
5,5,Mike,47,158
6,6,Dan,23,165


# In pandas
- axis=1 : column-wise operation
- axis=0 : row-wise operation

In [None]:
df.drop(['Unnamed: 0'], axis=1)

Unnamed: 0,name,age,height
0,Jams,32,109
1,John,35,121
2,Jane,41,136
3,Bob,20,170
4,Alice,23,112
5,Mike,47,158
6,Dan,23,165


In [17]:
df = pd.read_csv('./data/data3.csv', sep='\t')
df

Unnamed: 0,name,age,height
0,Jams,32,109
1,John,35,121
2,Jane,41,136
3,Bob,20,170
4,Alice,23,112
5,Mike,47,158
6,Dan,23,165


- Install `openpyxl` dependency to read from excel file
    ```shell
    pip install openpyxl
    ```

In [22]:
df = pd.read_excel('./data/data4.xlsx', sheet_name='Sheet1')\
    .drop('Unnamed: 0', axis=1) 
    
df

Unnamed: 0,name,age,height
0,Jams,32,109
1,John,35,121
2,Jane,41,136
3,Bob,20,170
4,Alice,23,112
5,Mike,47,158
6,Dan,23,165


In [23]:
df = pd.read_excel('./data/data5.xlsx', sheet_name='Sheet1') 
df

Unnamed: 0,name,age,height
0,Jams,32,109
1,John,35,121
2,Jane,41,136
3,Bob,20,170
4,Alice,23,112
5,Mike,47,158
6,Dan,23,165


In [24]:
df = pd.read_json('./data/data6.json')
df

Unnamed: 0,name,age,height
0,Jams,32,109
1,John,35,121
2,Jane,41,136
3,Bob,20,170
4,Alice,23,112
5,Mike,47,158
6,Dan,23,165


In [26]:
pd.read_json('./data/data7.json')

Unnamed: 0,id,name,details
0,1,Alice,"{'age': 30, 'address': {'city': 'New York', 'z..."
1,2,Bob,"{'age': 25, 'address': {'city': 'Los Angeles',..."


In [28]:
import json

with open('./data/data7.json', 'r') as f:
    data = json.load(f)
data

[{'id': 1,
  'name': 'Alice',
  'details': {'age': 30, 'address': {'city': 'New York', 'zip': '10001'}}},
 {'id': 2,
  'name': 'Bob',
  'details': {'age': 25, 'address': {'city': 'Los Angeles', 'zip': '90001'}}}]

In [29]:
pd.json_normalize(data)

Unnamed: 0,id,name,details.age,details.address.city,details.address.zip
0,1,Alice,30,New York,10001
1,2,Bob,25,Los Angeles,90001


In [30]:
pd.json_normalize(data, sep='_')

Unnamed: 0,id,name,details_age,details_address_city,details_address_zip
0,1,Alice,30,New York,10001
1,2,Bob,25,Los Angeles,90001
