In [1]:
import pandas as pd

## Input and Output - CSV Tables

In [23]:
df = pd.read_csv("example.csv")
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [7]:
df = pd.read_csv("example.csv", header=None)
df

Unnamed: 0,0,1,2,3
0,a,b,c,d
1,0,1,2,3
2,4,5,6,7
3,8,9,10,11
4,12,13,14,15


In [9]:
df = pd.read_csv("example.csv", index_col=0)
df

Unnamed: 0_level_0,b,c,d
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,2,3
4,5,6,7
8,9,10,11
12,13,14,15


In [24]:
df.to_csv("newfile.csv", index = False)

In [25]:
new = pd.read_csv("newfile.csv")
new

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


## Input and Output - HTML Tables

In [2]:
url = "https://en.wikipedia.org/wiki/World_population"

In [3]:
tables = pd.read_html(url)

In [4]:
len(tables)

26

In [5]:
tables[0]

Unnamed: 0_level_0,"World population (millions, UN estimates)[14]","World population (millions, UN estimates)[14]","World population (millions, UN estimates)[14]","World population (millions, UN estimates)[14]","World population (millions, UN estimates)[14]"
Unnamed: 0_level_1,#,Top ten most populous countries,2000,2015,2030[A]
0,1,China[B],1270,1376,1416
1,2,India,1053,1311,1528
2,3,United States,283,322,356
3,4,Indonesia,212,258,295
4,5,Pakistan,136,208,245
5,6,Brazil,176,206,228
6,7,Nigeria,123,182,263
7,8,Bangladesh,131,161,186
8,9,Russia,146,146,149
9,10,Mexico,103,127,148


In [9]:
world_topten = tables[0]

In [None]:
world_topten = world_topten["World population (millions, UN estimates)[14]"]

In [14]:
world_topten = world_topten.drop(11, axis=0)

In [15]:
world_topten

Unnamed: 0,#,Top ten most populous countries,2000,2015,2030[A]
0,1.0,China[B],1270,1376,1416
1,2.0,India,1053,1311,1528
2,3.0,United States,283,322,356
3,4.0,Indonesia,212,258,295
4,5.0,Pakistan,136,208,245
5,6.0,Brazil,176,206,228
6,7.0,Nigeria,123,182,263
7,8.0,Bangladesh,131,161,186
8,9.0,Russia,146,146,149
9,10.0,Mexico,103,127,148


In [17]:
world_topten = world_topten.drop("#", axis=1)

In [18]:
world_topten

Unnamed: 0,Top ten most populous countries,2000,2015,2030[A]
0,China[B],1270,1376,1416
1,India,1053,1311,1528
2,United States,283,322,356
3,Indonesia,212,258,295
4,Pakistan,136,208,245
5,Brazil,176,206,228
6,Nigeria,123,182,263
7,Bangladesh,131,161,186
8,Russia,146,146,149
9,Mexico,103,127,148


In [19]:
world_topten.columns = ["Country","2000","2015","2030 Est."]

In [20]:
world_topten

Unnamed: 0,Country,2000,2015,2030 Est.
0,China[B],1270,1376,1416
1,India,1053,1311,1528
2,United States,283,322,356
3,Indonesia,212,258,295
4,Pakistan,136,208,245
5,Brazil,176,206,228
6,Nigeria,123,182,263
7,Bangladesh,131,161,186
8,Russia,146,146,149
9,Mexico,103,127,148


In [22]:
tables[6].set_index("Rank")

Unnamed: 0_level_0,Country,Population,Area(km2),Density(pop/km2),Population trend
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,India,1374990000,3287240,418,Growing
2,Pakistan,223100000,803940,278,Growing
3,Bangladesh,170390000,143998,1183,Rapidly growing
4,Japan,126010000,377873,333,Declining[98]
5,Philippines,110010000,300000,367,Growing
6,Vietnam,96209000,331689,290,Growing
7,United Kingdom,66436000,243610,273,Steady
8,South Korea,51781000,99538,520,Steady
9,Taiwan,23604000,36193,652,Steady
10,Sri Lanka,21803000,65610,332,Growing


In [23]:
world_topten.to_html("Sample_table.html", index=False)

## Input and Output - Excel Files

In [27]:
df = pd.read_excel("my_excel_file.xlsx", sheet_name="First_Sheet")

In [28]:
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [29]:
wb = pd.ExcelFile("my_excel_file.xlsx")

In [30]:
wb.sheet_names

['First_Sheet']

In [31]:
excel_sheet_dict = pd.read_excel("my_excel_file.xlsx", sheet_name=None)

In [32]:
type(excel_sheet_dict)

dict

In [33]:
excel_sheet_dict.keys()

dict_keys(['First_Sheet'])

In [34]:
excel_sheet_dict

{'First_Sheet':     a   b   c   d
 0   0   1   2   3
 1   4   5   6   7
 2   8   9  10  11
 3  12  13  14  15}

In [36]:
df.to_excel("example.xlsx")

## Input and Output - SQL Databases

In [39]:
import numpy as np

In [37]:
from sqlalchemy import create_engine

In [38]:
temp_db = create_engine("sqlite:///:memory:")

In [43]:
df = pd.DataFrame(data=np.random.randint(0,100,size=(4,4)),columns=["a","b","c","d"])

In [44]:
df

Unnamed: 0,a,b,c,d
0,24,21,67,92
1,58,29,24,81
2,19,43,47,31
3,19,94,36,28


In [45]:
df.to_sql(name="new_table", con=temp_db)

In [46]:
new_df = pd.read_sql(sql="new_table", con=temp_db)

In [47]:
new_df

Unnamed: 0,index,a,b,c,d
0,0,24,21,67,92
1,1,58,29,24,81
2,2,19,43,47,31
3,3,19,94,36,28


In [48]:
result = pd.read_sql_query(sql="SELECT a,c FROM new_table", con=temp_db)

In [49]:
result

Unnamed: 0,a,c
0,24,67
1,58,24
2,19,47
3,19,36
