In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__

'0.24.2'

## Multiple ways of creating a series

`Array`
--------

In [3]:
values = np.linspace(0,20,15)

In [4]:
s = pd.Series(values, name = "series1")

In [5]:
s.head()

0    0.000000
1    1.428571
2    2.857143
3    4.285714
4    5.714286
Name: series1, dtype: float64

`List`
--------

In [6]:
values = list(range(0,20,2))

In [7]:
values

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [8]:
s = pd.Series(values, name = "series1")

In [9]:
s.head()

0    0
1    2
2    4
3    6
4    8
Name: series1, dtype: int64

`Using Single Value`
--------------------

In [10]:
s = pd.Series(3, range(10), name = "series2")

In [11]:
s.head()

0    3
1    3
2    3
3    3
4    3
Name: series2, dtype: int64

`Custom Index`
--------------------

In [12]:


s = pd.Series([1,2,3], index = ['a','b','c'], name = "series3")

In [13]:
s.head()

a    1
b    2
c    3
Name: series3, dtype: int64

` Dictionary`
---------------------------------------

In [14]:

values= [1,2,3]
index = ['a','b','c']

In [15]:
dict_ = {x:y for x,y in zip(index, values)}

In [16]:
dict_

{'a': 1, 'b': 2, 'c': 3}

In [17]:
S = pd.Series(dict_)

In [18]:
S

a    1
b    2
c    3
dtype: int64

# Creating Dataframes

` Dictionary`
---------------------------------------

In [20]:
locations = {"Country": ["Japan", "Brazil", "India"],
            "City": ["Tokyo", "Sao Paulo", "Mumbai"]}

In [21]:
df = pd.DataFrame(locations)

In [22]:
df.head()

Unnamed: 0,Country,City
0,Japan,Tokyo
1,Brazil,Sao Paulo
2,India,Mumbai


`Using Zipped list`
---------------------------------------

In [30]:
country =["Japan", "Brazil", "India"]
city =["Tokyo", "Sao Paulo", "Mumbai"]

In [31]:
locations = list(zip(country, city))

In [32]:
locations

[('Japan', 'Tokyo'), ('Brazil', 'Sao Paulo'), ('India', 'Mumbai')]

In [33]:
columns = ['Country', 'City']


In [28]:
df = pd.DataFrame(locations, columns = columns)

In [29]:
df.head()

Unnamed: 0,Country,City
0,Japan,Tokyo
1,Brazil,Sao Paul
2,India,Mumbai


`Using CSV File`
---------------------------------------


In [34]:
#Let's save it to csv file

df.to_csv("country.csv")

In [37]:
df = pd.read_csv("country.csv", index_col = 0)

In [38]:
df.head()

Unnamed: 0,Country,City
0,Japan,Tokyo
1,Brazil,Sao Paul
2,India,Mumbai


In [None]:
Method #4 Excel File

In [39]:
df.to_excel("country.xlsx") # need to install ModuleNotFoundError: No module named 'openpyxl

In [42]:
df = pd.read_excel("country.xlsx", index_col = 0) #xlrd

In [43]:
df.head()

Unnamed: 0,Country,City
0,Japan,Tokyo
1,Brazil,Sao Paul
2,India,Mumbai


`Using html tables from websites`
---------------------------------------


In [45]:
url = "http://www.espn.com/nba/standings" #lxml

In [46]:
dfs = pd.read_html(url, header = None, skiprows = 0)

In [47]:
len(dfs)

8

In [None]:
dfs[3]

In [None]:
dfs[1]

In [54]:
dfs[1].columns

Index(['1* --MILMilwaukee Bucks'], dtype='object')

In [55]:
type(dfs[1])

pandas.core.frame.DataFrame

In [None]:
dfs[1]

In [57]:
index = pd.DataFrame(np.vstack([dfs[1].columns, dfs[1]])).values

In [58]:
index

array([['1* --MILMilwaukee Bucks'],
       ['2y --TORToronto Raptors'],
       ['3x --PHIPhiladelphia 76ers'],
       ['4x --BOSBoston Celtics'],
       ['5x --INDIndiana Pacers'],
       ['6x --BKNBrooklyn Nets'],
       ['7y --ORLOrlando Magic'],
       ['8x --DETDetroit Pistons'],
       ['e --CHACharlotte Hornets'],
       ['e --MIAMiami Heat'],
       ['e --WSHWashington Wizards'],
       ['e --ATLAtlanta Hawks'],
       ['e --CHIChicago Bulls'],
       ['e --CLECleveland Cavaliers'],
       ['e --NYNew York Knicks']], dtype=object)

In [60]:
dfs[3].head()

Unnamed: 0,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,60,22,0.732,-,33-8,27-14,14-2,40-12,118.1,109.3,8.8,L1,7-3
1,58,24,0.707,2,32-9,26-15,12-4,36-16,114.4,108.4,6.0,W2,7-3
2,51,31,0.622,9,31-10,20-21,8-8,31-21,115.2,112.5,2.7,W1,4-6
3,49,33,0.598,11,28-13,21-20,10-6,35-17,112.4,108.0,4.4,W1,6-4
4,48,34,0.585,12,29-12,19-22,11-5,33-19,108.0,104.7,3.3,W1,4-6


In [61]:
dfs[3].set_index(index)

Unnamed: 0,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
"(1* --MILMilwaukee Bucks,)",60,22,0.732,-,33-8,27-14,14-2,40-12,118.1,109.3,8.8,L1,7-3
"(2y --TORToronto Raptors,)",58,24,0.707,2,32-9,26-15,12-4,36-16,114.4,108.4,6.0,W2,7-3
"(3x --PHIPhiladelphia 76ers,)",51,31,0.622,9,31-10,20-21,8-8,31-21,115.2,112.5,2.7,W1,4-6
"(4x --BOSBoston Celtics,)",49,33,0.598,11,28-13,21-20,10-6,35-17,112.4,108.0,4.4,W1,6-4
"(5x --INDIndiana Pacers,)",48,34,0.585,12,29-12,19-22,11-5,33-19,108.0,104.7,3.3,W1,4-6
"(6x --BKNBrooklyn Nets,)",42,40,0.512,18,23-18,19-22,8-8,29-23,112.2,112.3,-0.1,W3,6-4
"(7y --ORLOrlando Magic,)",42,40,0.512,18,25-16,17-24,10-6,30-22,107.3,106.6,0.7,W4,8-2
"(8x --DETDetroit Pistons,)",41,41,0.5,19,26-15,15-26,8-8,27-25,107.0,107.3,-0.3,W2,4-6
"(e --CHACharlotte Hornets,)",39,43,0.476,21,25-16,14-27,10-6,29-23,110.7,111.8,-1.1,L1,6-4
"(e --MIAMiami Heat,)",39,43,0.476,21,19-22,20-21,7-9,23-29,105.7,105.9,-0.2,L1,4-6


In [None]:
for df in dfs:
    print(df.head())

In [62]:
dfs[7].head()

Unnamed: 0,W,L,PCT,GB,HOME,AWAY,DIV,CONF,PPG,OPP PPG,DIFF,STRK,L10
0,57,25,0.695,-,30-11,27-14,13-3,35-17,117.7,111.2,6.5,L1,8-2
1,54,28,0.659,3,34-7,20-21,12-4,34-18,110.7,106.7,4.0,W1,5-5
2,53,29,0.646,4,32-9,21-20,6-10,29-23,114.7,110.5,4.2,W3,8-2
3,53,29,0.646,4,31-10,22-19,10-6,32-20,113.9,109.1,4.8,L1,8-2
4,50,32,0.61,7,29-12,21-20,8-8,30-22,111.7,106.5,5.2,L1,8-2
