# Reading data from different sources 

In [3]:
import pandas as pd 
from io import StringIO

Data = '{"employee_name": "James", "email": "james@gmail.com", "job_profile": [{"title1":"Team Lead", "title2":"Sr. Developer"}]}'

# Converting JSON to Dataframe 
- StringIO(Data) converts the string into a file-like object.

- pd.read_json(...) parses it as JSON.

- The result is converted into a pandas DataFrame (df).

In [5]:
df=pd.read_json(StringIO(Data))
df

Unnamed: 0,employee_name,email,job_profile
0,James,james@gmail.com,"{'title1': 'Team Lead', 'title2': 'Sr. Develop..."


# Converting Dataframe to JSON  

In [None]:
df.to_json()

'{"employee_name":{"0":"James"},"email":{"0":"james@gmail.com"},"job_profile":{"0":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

# Converting Datafram to JSON based on rows and index 

In [8]:
df.to_json(orient='index') # Default orient = 'index'
df.to_json(orient='records')

'[{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}]'

# Reading csv from a URL 

In [12]:
df2 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',header=None)
df2.head(5)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


# Converting Dataframe to CSV

In [None]:
df2.to_csv("wine.csv")

# Reading Data from HTML File 

In [10]:
import pandas as pd 
url =  "https://www.fdic.gov/bank-failures/failed-bank-list"

df_html = pd.read_html(url)
df_html

[                               Bank Name           City         State   Cert  \
 0           The Santa Anna National Bank     Santa Anna         Texas   5520   
 1                   Pulaski Savings Bank        Chicago      Illinois  28611   
 2     The First National Bank of Lindsay        Lindsay      Oklahoma   4134   
 3  Republic First Bank dba Republic Bank   Philadelphia  Pennsylvania  27332   
 4                          Citizens Bank       Sac City          Iowa   8758   
 5               Heartland Tri-State Bank        Elkhart        Kansas  25851   
 6                    First Republic Bank  San Francisco    California  59017   
 7                         Signature Bank       New York      New York  57053   
 8                    Silicon Valley Bank    Santa Clara    California  24735   
 9                      Almena State Bank         Almena        Kansas  15426   
 
                  Acquiring Institution      Closing Date  Fund  Sort ascending  
 0            Coleman Cou

In [20]:
url2 = "https://en.wikipedia.org/wiki/Mobile_country_code"
df_country_code = pd.read_html(url2, match="Country",header=0)[0] 
df_country_code


Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,
...,...,...,...,...,...,...
247,452,Vietnam,VN,List of mobile network codes in the Vietnam,,
248,543,W Wallis and Futuna,WF,List of mobile network codes in Wallis and Futuna,,
249,421,Y Yemen,YE,List of mobile network codes in the Yemen,,
250,645,Z Zambia,ZM,List of mobile network codes in Zambia,,


In [None]:
df_excel = pd.read_excel('sample_data.xlsx')
df_excel

Unnamed: 0,Name,Age
0,Samad,20
1,James,21
2,Alice,22


In [23]:
df_excel.to_pickle('df_excel')

In [24]:
pd.read_pickle('df_excel')

Unnamed: 0,Name,Age
0,Samad,20
1,James,21
2,Alice,22
