# data frames and Series have some methods in common

In [2]:
import pandas as pd

In [2]:
nba = pd.read_csv("nba.csv")

In [3]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [4]:
nba.index

RangeIndex(start=0, stop=458, step=1)

In [5]:
nba.count()

Name        457
Team        457
Number      457
Position    457
Age         457
Height      457
Weight      457
College     373
Salary      446
dtype: int64

In [6]:
nba.shape

(458, 9)

In [7]:
nba.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [15]:
nba.axes

[RangeIndex(start=0, stop=458, step=1),
 Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
        'College', 'Salary'],
       dtype='object')]

In [17]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     373 non-null object
Salary      446 non-null float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


In [19]:
nba.get_dtype_counts()

float64    4
object     5
dtype: int64

In [21]:
rev = pd.read_csv("revenue.csv", index_col="Date")
rev

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/16,985,122,499
1/2/16,738,788,534
1/3/16,14,20,933
1/4/16,730,904,885
1/5/16,114,71,253
1/6/16,936,502,497
1/7/16,123,996,115
1/8/16,935,492,886
1/9/16,846,954,823
1/10/16,54,285,216


# axis=0 or axis="index" same thing

In [23]:
rev.sum(axis=0)

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [24]:
rev.sum(axis="index")

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

# axis=1 or axis="column" is same.
# It is horizontally adding the rows i.e, 985+122+499 = 1606

In [25]:
rev.sum(axis=1)

Date
1/1/16     1606
1/2/16     2060
1/3/16      967
1/4/16     2519
1/5/16      438
1/6/16     1935
1/7/16     1234
1/8/16     2313
1/9/16     2623
1/10/16     555
dtype: int64

# Lec 47 select one column from dataframe

In [27]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


# method 1 -> using column name as property of dataframe object

In [29]:
nba.Team.head()

0    Boston Celtics
1    Boston Celtics
2    Boston Celtics
3    Boston Celtics
4    Boston Celtics
Name: Team, dtype: object

# method 2

In [30]:
nba["Team"].head()

0    Boston Celtics
1    Boston Celtics
2    Boston Celtics
3    Boston Celtics
4    Boston Celtics
Name: Team, dtype: object

In [31]:
nba["TeamTeam"].head()

KeyError: 'TeamTeam'

# when a single column is extracted from a dataframe it becomes
# an object of Series

In [35]:
type(nba["Team"])

pandas.core.series.Series

# Lec 48 Select 2 or more columns

In [37]:
nba[["Name", "Team"]].head()

Unnamed: 0,Name,Team
0,Avery Bradley,Boston Celtics
1,Jae Crowder,Boston Celtics
2,John Holland,Boston Celtics
3,R.J. Hunter,Boston Celtics
4,Jonas Jerebko,Boston Celtics


# Lec 49 Add a new column to dataframe

In [39]:
nba["Rating"] = "Good" 

In [40]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Rating
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Good
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Good
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Good
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Good
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Good


# inserting columns to a specific index

In [3]:
nba = pd.read_csv("nba.csv")

In [42]:
nba.insert(3, column="Sport", value="BasketBall")

In [44]:
nba.tail()

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,Weight,College,Salary
453,Shelvin Mack,Utah Jazz,8.0,BasketBall,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,BasketBall,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,BasketBall,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,BasketBall,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,BasketBall,,,,,,


# Lec 50 Braodcasting Operations
# it is similar to apply() method
### apply method applies operation on every single value in the Series


In [4]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [6]:
nba["Number"].add(5).head()

0      5.0
1    104.0
2     35.0
3     33.0
4     13.0
Name: Number, dtype: float64

In [11]:
nba["Number"].head()+5

0      5.0
1    104.0
2     35.0
3     33.0
4     13.0
Name: Number, dtype: float64

## other mathematical methods are sub(), div(), mul() 

In [19]:
nba = pd.read_csv("nba.csv")

In [21]:
nba["Number"] = nba["Number"]*10

In [23]:
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,990.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,300.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,280.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,80.0,PF,29.0,6-10,231.0,,5000000.0


# Lec 51 value_counts()

In [28]:
nba["Team"].value_counts().tail()

Detroit Pistons           15
Chicago Bulls             15
Dallas Mavericks          15
Minnesota Timberwolves    14
Orlando Magic             14
Name: Team, dtype: int64