# Manipulating DataFrame Structure

In [None]:
import numpy as np
import pandas as pd

sp500 = pd.read_csv("data/sp500.csv", 
    index_col='Symbol', 
    usecols=[0, 2, 3, 7]
)

## Renaming columns

In [None]:
# rename the Book Value column to not have a space
# this returns a copy with the column renamed
# but the columns in the original did not change
newSP500 = sp500.rename(columns=
                        {'Book Value': 'BookValue'})
# print first 2 rows
newSP500[:2]

In [None]:
# rename the column in-place
sp500.rename(columns=                  
             {'Book Value': 'BookValue'},                   
             inplace=True)
# we can see the column is changed
sp500.columns

## Adding new columns

In [None]:
# make a copy so that we keep the original data unchanged
sp500_copy = sp500.copy()
# add the new column
sp500_copy['RoundedPrice'] = sp500.Price.round()
sp500_copy[:2]

In [None]:
# make a copy so that we keep the original data unchanged
copy = sp500.copy()
# insert sp500.Price * 2 as the 
# second column in the DataFrame
copy.insert(1, 'RoundedPrice', sp500.Price.round())
copy[:2]

In [None]:
# using concatenation
# create a DataFrame with only the RoundedPrice column
rounded_price = pd.DataFrame({'RoundedPrice': sp500.Price.round()})
# concatenate along the columns axis
concatenated = pd.concat([sp500, rounded_price], axis=1)
concatenated[:5]

## Reordering columns (out-of-place)

In [None]:
# return a new DataFrame with the columns reversed
reversed_column_names = sp500.columns[::-1]
sp500[reversed_column_names][:5]


## Replacing the contents of a column

In [None]:
# this occurs in-place so let's use a copy
copy = sp500.copy()
# replace the Price column data with the new values
# instead of adding a new column
copy.Price = rounded_price.RoundedPrice
copy[:5]

## Deleting columns

In [None]:
# Example of using del to delete a column
# make a copy as this is done in-place
copy = sp500.copy()
del copy['Book Value']
copy[:2]

In [None]:
# Example of using pop to remove a column from a DataFrame
# first make a copy of a subset of the data frame as
# pop works in place
copy = sp500.copy()
# this will remove Sector and return it as a series
popped = copy.pop('Sector')
# Sector column removed in-place
copy[:2]

In [None]:
# Example of using drop to remove a column 
# make a copy of a subset of the data frame
copy = sp500.copy()
# this will return a new DataFrame with 'Sector’ removed
# the copy DataFrame is not modified
afterdrop = copy.drop(['Sector'], axis = 1)
afterdrop[:5]

## Appending new rows

In [None]:
# copy the first three rows of sp500
df1 = sp500.iloc[0:3].copy()
# copy 10th and 11th rows
df2 = sp500.iloc[[10, 11, 2]]
# append df1 and df2
appended = df1.append(df2)
# the result is the rows of the first followed by 
# those of the second
appended

In [None]:
# ignore index labels, create default index
df1.append(df2, ignore_index=True)

## Concatenating rows

In [54]:
# copy the first three rows of sp500
df1 = sp500.iloc[0:3].copy()
# copy 10th and 11th rows
df2 = sp500.iloc[[10, 11, 2]]
# pass them as a list
pd.concat([df1, df2])

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
A,Health Care,56.18,16.928
GAS,Utilities,52.98,32.462
ABBV,Health Care,53.95,2.954


## Adding and replacing rows via setting with enlargement

In [55]:
# get a small subset of the sp500 
# make sure to copy the slice to make a copy
ss = sp500[:3].copy()
# create a new row with index label FOO
# and assign some values to the columns via a list
ss.loc['FOO'] = ['the sector', 100, 110]
ss

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
FOO,the sector,100.0,110.0


## Removing rows using `.drop()`

In [56]:
# get a copy of the first 5 rows of sp500
ss = sp500[:5]
ss

# drop rows with labels ABT and ACN (out-of-place)
afterdrop = ss.drop(['ABT', 'ACN'])
afterdrop[:5]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABBV,Health Care,53.95,2.954
ACE,Financials,102.91,86.897


## Removing rows using Boolean selection

In [57]:
# determine the rows where Price > 300
selection = sp500.Price > 300

# select the complement of the expression
# note the use of the complement of the selection
price_less_than_300 = sp500[~selection]
price_less_than_300

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.60,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897
...,...,...,...
YHOO,Information Technology,35.02,12.768
YUM,Consumer Discretionary,74.77,5.147
ZMH,Health Care,101.84,37.181
ZION,Financials,28.43,30.191
