# DataFrame - create, view, modify and sort

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [3]:
import webbrowser as wb
site='https://en.wikipedia.org/wiki/NFL_win-loss_records'
wb.open(site)

True

### Creating a DF by pasting data from the clipboard

In [14]:
nfl= pd.read_clipboard()
nfl

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas


## Creating a new DF from a DF

In [15]:
new_nfl=DataFrame(nfl,columns=['Team','Total Games','Stadium']) # creating a new dataframe from a df
new_nfl

Unnamed: 0,Team,Total Games,Stadium
0,Dallas Cowboys,866,
1,Green Bay Packers,1320,
2,Chicago Bears,1354,
3,Miami Dolphins,784,
4,New England Patriots,868,


## Creating a DF by converting a Dicitonary object

In [16]:
data = {'City':['SF','LA','NYC'],'Population':[864816,3976000,8538000]}
data

{'City': ['SF', 'LA', 'NYC'], 'Population': [864816, 3976000, 8538000]}

In [17]:
citydf = DataFrame(data) # Passing a dictionary to create a DF
citydf

Unnamed: 0,City,Population
0,SF,864816
1,LA,3976000
2,NYC,8538000


## Creating a DF by passing a ndarray, DatetimeIndex and Labeled columns

In [18]:
dates = pd.date_range('20130101', periods=6)
df= DataFrame(np.random.randn(6,4),
             index=dates,
             columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.996086,0.319651,-0.744387,-0.303611
2013-01-02,0.994615,0.91878,-0.117987,-0.490556
2013-01-03,-1.187591,-1.010861,1.143807,0.390009
2013-01-04,-0.532076,0.583875,-0.876323,0.738796
2013-01-05,2.078625,1.233413,-1.970443,0.169443
2013-01-06,1.085782,-0.671153,1.024663,1.444658


## Viewing data

In [19]:
nfl.tail(2) # the bottom two rows of the df. By default it returns 5 rows

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas


In [20]:
nfl.head(2) # the top 2 rows. By default it returns 5 rows

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North


In [21]:
nfl.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [22]:
nfl.index

RangeIndex(start=0, stop=5, step=1)

In [23]:
nfl.values

array([[1, 'Dallas Cowboys', 493, 367, 6, 0.573, 1960, '866', 'NFC East'],
       [2, 'Green Bay Packers', 730, 553, 37, 0.5670000000000001, 1921,
        '1,320', 'NFC North'],
       [3, 'Chicago Bears', 744, 568, 42, 0.565, 1920, '1,354', 'NFC North'],
       [4, 'Miami Dolphins', 439, 341, 4, 0.563, 1966, '784', 'AFC East'],
       [5, 'New England Patriots', 476, 383, 9, 0.5539999999999999, 1960,
        '868', 'AFC Eas']], dtype=object)

In [24]:
nfl.describe()

Unnamed: 0,Rank,Won,Lost,Tied,Pct.,First NFL Season
count,5.0,5.0,5.0,5.0,5.0,5.0
mean,3.0,576.4,442.4,19.6,0.5644,1945.4
std,1.581139,147.984121,108.976144,18.338484,0.006914,22.86482
min,1.0,439.0,341.0,4.0,0.554,1920.0
25%,2.0,476.0,367.0,6.0,0.563,1921.0
50%,3.0,493.0,383.0,9.0,0.565,1960.0
75%,4.0,730.0,553.0,37.0,0.567,1960.0
max,5.0,744.0,568.0,42.0,0.573,1966.0


In [25]:
nfl.T

Unnamed: 0,0,1,2,3,4
Rank,1,2,3,4,5
Team,Dallas Cowboys,Green Bay Packers,Chicago Bears,Miami Dolphins,New England Patriots
Won,493,730,744,439,476
Lost,367,553,568,341,383
Tied,6,37,42,4,9
Pct.,0.573,0.567,0.565,0.563,0.554
First NFL Season,1960,1921,1920,1966,1960
Total Games,866,1320,1354,784,868
Division,NFC East,NFC North,NFC North,AFC East,AFC Eas


# Adding & Modifying columns 

#### Adding one value to all rows of a column 

In [26]:
nfl['Stadium']="Levi's Stadium"
nfl

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Levi's Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Levi's Stadium
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Levi's Stadium
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Levi's Stadium
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas,Levi's Stadium


#### Adding a ndarray to rows of a column

In [27]:
nfl['Stadium']=np.arange(1,6) 
nfl

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,1
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,2
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,3
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,4
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas,5


#### Adding Series to a row of columns in a DF

In [28]:
stadiums=Series(["Levi's Stadium", "AT&T Stadium"], index=[4,0])
stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [29]:
nfl['Stadium']=stadiums 
nfl

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,AT&T Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas,Levi's Stadium


#### Deleting a column in a DF

In [30]:
del nfl['Stadium']
nfl

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas


# Sorting

In [31]:
nfl.sort_index(axis=1, ascending=False) # sorting by axis

Unnamed: 0,Won,Total Games,Tied,Team,Rank,Pct.,Lost,First NFL Season,Division
0,493,866,6,Dallas Cowboys,1,0.573,367,1960,NFC East
1,730,1320,37,Green Bay Packers,2,0.567,553,1921,NFC North
2,744,1354,42,Chicago Bears,3,0.565,568,1920,NFC North
3,439,784,4,Miami Dolphins,4,0.563,341,1966,AFC East
4,476,868,9,New England Patriots,5,0.554,383,1960,AFC Eas


In [33]:
nfl.sort_values(by='Won',ascending=False) # sorting by values in a row in a columnn

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC Eas
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East


In [45]:
ser1= Series(np.random.randn(10))
ser1

0   -0.556892
1    0.542276
2    2.067512
3    1.409528
4    0.926555
5    0.165241
6    1.803255
7   -0.404141
8    1.783934
9    1.104506
dtype: float64

In [49]:
ser1.sort_values()

0   -0.556892
7   -0.404141
5    0.165241
1    0.542276
4    0.926555
9    1.104506
3    1.409528
8    1.783934
6    1.803255
2    2.067512
dtype: float64

In [51]:
ser1.rank() # Rank of the index-values in respect to the data-values

0     1.0
1     4.0
2    10.0
3     7.0
4     5.0
5     3.0
6     9.0
7     2.0
8     8.0
9     6.0
dtype: float64