# Pandas

In [82]:
import numpy as np
import pandas as pd
from io import StringIO

In [83]:
df=pd.DataFrame(np.arange(0,20).reshape(5,4), index=['row1', 'row2', 'row3', 'row4', 'row5'], columns=['col1', 'col2', 'col3', 'col4'])

In [84]:
df.head()

Unnamed: 0,col1,col2,col3,col4
row1,0,1,2,3
row2,4,5,6,7
row3,8,9,10,11
row4,12,13,14,15
row5,16,17,18,19


In [85]:
df.loc['row1']

col1    0
col2    1
col3    2
col4    3
Name: row1, dtype: int64

In [86]:
df[['col1', 'col2']]

Unnamed: 0,col1,col2
row1,0,1
row2,4,5
row3,8,9
row4,12,13
row5,16,17


In [87]:
df.iloc[:, :]

Unnamed: 0,col1,col2,col3,col4
row1,0,1,2,3
row2,4,5,6,7
row3,8,9,10,11
row4,12,13,14,15
row5,16,17,18,19


In [88]:
type(df.iloc[:, 0])

pandas.core.series.Series

In [89]:
df.isnull().sum()

col1    0
col2    0
col3    0
col4    0
dtype: int64

In [90]:
df['col1'].value_counts()

col1
0     1
4     1
8     1
12    1
16    1
Name: count, dtype: int64

In [91]:
df = pd.read_csv('Test1.csv')

In [92]:
df.head()

Unnamed: 0.1,Unnamed: 0,col1,col2,col3,col4
0,row1,0,1,2,3
1,row2,4,5,6,7
2,row3,8,9,10,11
3,row4,12,13,14,15
4,row5,16,17,18,19


In [93]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  5 non-null      object
 1   col1        5 non-null      int64 
 2   col2        5 non-null      int64 
 3   col3        5 non-null      int64 
 4   col4        5 non-null      int64 
dtypes: int64(4), object(1)
memory usage: 328.0+ bytes


In [94]:
df.describe()

Unnamed: 0,col1,col2,col3,col4
count,5.0,5.0,5.0,5.0
mean,8.0,9.0,10.0,11.0
std,6.324555,6.324555,6.324555,6.324555
min,0.0,1.0,2.0,3.0
25%,4.0,5.0,6.0,7.0
50%,8.0,9.0,10.0,11.0
75%,12.0,13.0,14.0,15.0
max,16.0,17.0,18.0,19.0


# CSV File Handling

In [95]:
from io import StringIO

In [96]:
data = ('col1,col2,col3\n'
        '5, 6, 7\n'
        '1, 2, 3\n'
        '4, 5, 6\n'
       )

In [97]:
type(data)

str

In [98]:
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,5,6,7
1,1,2,3
2,4,5,6


In [99]:
df=pd.read_csv(StringIO(data), usecols=['col1', 'col3'])

In [100]:
df

Unnamed: 0,col1,col3
0,5,7
1,1,3
2,4,6


In [101]:
df=pd.read_csv(StringIO(data), dtype={'col1':int, 'col2':float, 'col3':str})#can use different data types for diff columns

In [102]:
df

Unnamed: 0,col1,col2,col3
0,5,6.0,7
1,1,2.0,3
2,4,5.0,6


In [103]:
import pandas as pd
from io import StringIO

In [104]:
data2 = ('a,b,c\n'
         '4,apple,10\n'
         '8,mango,20'
        )

In [105]:
pd.read_csv(StringIO(data2), index_col=False)

Unnamed: 0,a,b,c
0,4,apple,10
1,8,mango,20


In [106]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)

In [107]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


# Json File Handling

In [108]:
data='[{"name":"Tony Stark", "email":"tony@gmail.com", "list":[{"title":"IRONMAN","job":"Avengers"}]}, {"name":"Tony Stark", "email":"tony@gmail.com", "list":[{"title":"IRONMAN","job":"Avengers"}]}]'

In [109]:
d1=pd.read_json(StringIO(data))

In [110]:
d1

Unnamed: 0,name,email,list
0,Tony Stark,tony@gmail.com,"[{'title': 'IRONMAN', 'job': 'Avengers'}]"
1,Tony Stark,tony@gmail.com,"[{'title': 'IRONMAN', 'job': 'Avengers'}]"


In [111]:
d1.to_json()

'{"name":{"0":"Tony Stark","1":"Tony Stark"},"email":{"0":"tony@gmail.com","1":"tony@gmail.com"},"list":{"0":[{"title":"IRONMAN","job":"Avengers"}],"1":[{"title":"IRONMAN","job":"Avengers"}]}}'

In [112]:
d1.to_json(orient='records')

'[{"name":"Tony Stark","email":"tony@gmail.com","list":[{"title":"IRONMAN","job":"Avengers"}]},{"name":"Tony Stark","email":"tony@gmail.com","list":[{"title":"IRONMAN","job":"Avengers"}]}]'

# HTML Table Scraping

In [113]:
url = 'https://fdic.gov/bank-failures/failed-bank-list'

In [114]:
type(pd.read_html(url))

list

In [115]:
pd.read_html(url)[0]

Unnamed: 0,Bank Name,City,State,Cert,Acquiring Institution,Closing Date,Fund Sort ascending
0,Pulaski Savings Bank,Chicago,Illinois,28611,Millennium Bank,"January 17, 2025",10548
1,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
2,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
3,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
4,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
5,First Republic Bank,San Francisco,California,59017,"JPMorgan Chase Bank, N.A.","May 1, 2023",10543
6,Signature Bank,New York,New York,57053,"Flagstar Bank, N.A.","March 12, 2023",10540
7,Silicon Valley Bank,Santa Clara,California,24735,First Citizens Bank & Trust Company,"March 10, 2023",10539
8,Almena State Bank,Almena,Kansas,15426,Equity Bank,"October 23, 2020",10538
9,First City Bank of Florida,Fort Walton Beach,Florida,16748,"United Fidelity Bank, fsb","October 16, 2020",10537


In [116]:
country_code_url = 'https://en.wikipedia.org/wiki/Mobile_country_code'

In [117]:
code = pd.read_html(country_code_url, match='Country', header=0)

In [118]:
code[0]

Unnamed: 0,Mobile country code,Country,ISO 3166,Mobile network codes,National MNC authority,Remarks
0,289,A Abkhazia,GE-AB,List of mobile network codes in Abkhazia,,MCC is not listed by ITU
1,412,Afghanistan,AF,List of mobile network codes in Afghanistan,,
2,276,Albania,AL,List of mobile network codes in Albania,,
3,603,Algeria,DZ,List of mobile network codes in Algeria,,
4,544,American Samoa (United States of America),AS,List of mobile network codes in American Samoa,,
...,...,...,...,...,...,...
247,452,Vietnam,VN,List of mobile network codes in the Vietnam,,
248,543,W Wallis and Futuna,WF,List of mobile network codes in Wallis and Futuna,,
249,421,Y Yemen,YE,List of mobile network codes in the Yemen,,
250,645,Z Zambia,ZM,List of mobile network codes in Zambia,,


In [119]:
typing_data = pd.read_csv('Downloads/results.csv', usecols=['wpm', 'acc','consistency',])

FileNotFoundError: [Errno 2] No such file or directory: 'Downloads/results.csv'

In [None]:
typing_data

Unnamed: 0,wpm,acc,consistency
0,92.60,95.22,78.29
1,74.79,91.67,65.98
2,91.19,95.83,79.59
3,87.18,92.00,81.58
4,89.58,94.96,75.88
...,...,...,...
1009,110.38,100.00,85.58
1010,91.19,95.80,71.09
1011,87.99,96.49,70.08
1012,90.39,95.16,74.45


In [None]:
typing_data.describe()

Unnamed: 0,wpm,acc,consistency
count,1014.0,1014.0,1014.0
mean,80.735207,92.978728,70.992495
std,11.497605,3.733579,7.831737
min,44.39,77.05,30.23
25%,73.5925,91.0025,66.6975
50%,80.72,93.45,71.485
75%,87.93,95.4975,75.88
max,119.96,100.0,90.22


In [None]:
typing_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1014 entries, 0 to 1013
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   wpm          1014 non-null   float64
 1   acc          1014 non-null   float64
 2   consistency  1014 non-null   float64
dtypes: float64(3)
memory usage: 23.9 KB


In [None]:
typing_data.T #transposes the data

NameError: name 'typing_data' is not defined

In [None]:
typing_data['wpm'].nunique()

NameError: name 'typing_data' is not defined

In [None]:
typing_data['wpm'].unique()

NameError: name 'typing_data' is not defined