### Reading Data From Different Sources

In [3]:
import pandas as pd
from io import StringIO

Data = '{"employee_name" : "Suk", "email" : "suk@gmail.com", "job_profile" : [{"title1" : "Team Lead", "title2" : "Developer"}]}'
df = pd.read_json(StringIO(Data))
print(df)

  employee_name          email                                     job_profile
0           Suk  suk@gmail.com  {'title1': 'Team Lead', 'title2': 'Developer'}


In [4]:
df

Unnamed: 0,employee_name,email,job_profile
0,Suk,suk@gmail.com,"{'title1': 'Team Lead', 'title2': 'Developer'}"


In [5]:
df.to_json()

'{"employee_name":{"0":"Suk"},"email":{"0":"suk@gmail.com"},"job_profile":{"0":{"title1":"Team Lead","title2":"Developer"}}}'

In [6]:
# Converts DataFrame to JSON format
# 'orient=index' → each row index becomes a key, and its value is that row’s data as a dictionary
json_data = df.to_json(orient='index')

In [7]:
# Converts DataFrame to JSON format
# 'orient=records' → each row becomes a separate dictionary (list of row dictionaries)
json_data = df.to_json(orient='records')

In [10]:
df = pd.read_csv("https://dog.ceo/api/breeds/list/all", header = None)

In [11]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,156,157,158,159,160,161,162,163,164,165
0,"{""message"":{""affenpinscher"":[]","african:[""wild""]",airedale:[],akita:[],appenzeller:[],"australian:[""kelpie""",shepherd],"bakharwal:[""indian""]",basenji:[],beagle:[],...,westhighland,wheaten,yorkshire],tervuren:[],vizsla:[],"waterdog:[""spanish""]",weimaraner:[],whippet:[],"wolfhound:[""irish""]}","status:""success""}"


In [12]:
df.to_csv("wine.csv")

In [15]:
!pip install lxml



In [17]:
!pip install html5lib
!pip install beautifulsoup4

Collecting beautifulsoup4
  Downloading beautifulsoup4-4.14.2-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.8-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.14.2-py3-none-any.whl (106 kB)
Downloading soupsieve-2.8-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4

   ---------------------------------------- 0/2 [soupsieve]
   ---------------------------------------- 0/2 [soupsieve]
   ---------------------------------------- 0/2 [soupsieve]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   ---------------------------------------- 2/2 [beautifulsoup4]

Successfully installed beautifulsoup4-4.14.2 soupsiev

In [20]:
url = "https://www.w3schools.com/html/html_tables.asp"

df = pd.read_html(url)

In [22]:
df[0]

Unnamed: 0,Company,Contact,Country
0,Alfreds Futterkiste,Maria Anders,Germany
1,Centro comercial Moctezuma,Francisco Chang,Mexico
2,Ernst Handel,Roland Mendel,Austria
3,Island Trading,Helen Bennett,UK
4,Laughing Bacchus Winecellars,Yoshi Tannamuri,Canada
5,Magazzini Alimentari Riuniti,Giovanni Rovelli,Italy


In [28]:
url = "https://www.basketball-reference.com/leagues/NBA_2023_standings.html"

pd.read_html(url, match="Eastern Conference", header=0)[0]

Unnamed: 0,Eastern Conference,W,L,W/L%,GB,PS/G,PA/G,SRS
0,Milwaukee Bucks*,58,24,0.707,—,116.9,113.3,3.61
1,Boston Celtics*,57,25,0.695,1.0,117.9,111.4,6.38
2,Philadelphia 76ers*,54,28,0.659,4.0,115.2,110.9,4.37
3,Cleveland Cavaliers*,51,31,0.622,7.0,112.3,106.9,5.23
4,New York Knicks*,47,35,0.573,11.0,116.0,113.1,2.99
5,Brooklyn Nets*,45,37,0.549,13.0,113.4,112.5,1.03
6,Miami Heat*,44,38,0.537,14.0,109.5,109.8,-0.13
7,Atlanta Hawks*,41,41,0.5,17.0,118.4,118.1,0.32
8,Toronto Raptors,41,41,0.5,17.0,112.9,111.4,1.59
9,Chicago Bulls,40,42,0.488,18.0,113.1,111.8,1.37


In [None]:
# To read excel files, we may need to install openpyxl or xlrd
# These libraries help Python open .xlsx files.

# !pip install openpyxl
# This command installs the 'openpyxl' library (used by pandas to read Excel files).

df_excel = pd.read_excel('data.xlsx')
# This reads the Excel file named 'data.xlsx' and stores the data in a DataFrame called df_excel.

df_excel.to_pickle('df_excel')
# This saves the DataFrame into a pickle file named 'df_excel'.
# Pickle is a fast binary format used to save Python objects.

pd.read_pickle('df_excel')
# This loads the saved pickle file back into Python and returns the stored DataFrame.