## Creation

In [1]:
import pandas as pd

In [2]:
stock_dataframe = pd.DataFrame({'High': [2, 2.2, 2.4], 'Low': [1.5, 1.7, 2.1], 'Close': [2, 2.1, 2.2]})
stock_dataframe

Unnamed: 0,High,Low,Close
0,2.0,1.5,2.0
1,2.2,1.7,2.1
2,2.4,2.1,2.2


In [3]:
open_prices = pd.Series([5, 5.6, 5.2], index=['Jan 1', 'Jan 2', 'Jan 3'])
close_prices = pd.Series([5.2, 5.7, 5.4], index=['Jan 2', 'Jan 3', 'Jan 4'])

In [6]:
stock_dataframe = pd.DataFrame({'Open': open_prices, 'Close': close_prices})
stock_dataframe

Unnamed: 0,Open,Close
Jan 1,5.0,
Jan 2,5.6,5.2
Jan 3,5.2,5.7
Jan 4,,5.4


In [9]:
apple_stock_data = pd.read_csv('AAPL.csv', 
                               usecols=['Date', 'Open', 'High', 'Low', 'Close'], 
                               parse_dates=True, 
                               index_col='Date')
apple_stock_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,180.0,182.669998,179.369995,180.910004
2019-03-13,182.25,183.300003,180.919998,181.710007
2019-03-14,183.899994,184.100006,182.559998,183.729996
2019-03-15,184.850006,187.330002,183.740005,186.119995
2019-03-18,185.800003,188.389999,185.789993,188.020004
2019-03-19,188.350006,188.990005,185.919998,186.529999
2019-03-20,186.229996,189.490005,184.729996,188.160004
2019-03-21,190.020004,196.330002,189.809998,195.089996
2019-03-22,195.339996,197.690002,190.779999,191.050003
2019-03-25,191.509995,191.979996,186.600006,188.740005


## Functions
### Fetch

In [13]:
apple_stock_data['Close']   # returns a Series

Date
2019-03-12    180.910004
2019-03-13    181.710007
2019-03-14    183.729996
2019-03-15    186.119995
2019-03-18    188.020004
2019-03-19    186.529999
2019-03-20    188.160004
2019-03-21    195.089996
2019-03-22    191.050003
2019-03-25    188.740005
2019-03-26    186.789993
2019-03-27    188.470001
2019-03-28    188.720001
2019-03-29    189.949997
2019-04-01    191.240005
2019-04-02    194.020004
2019-04-03    195.350006
2019-04-04    195.690002
2019-04-05    197.000000
2019-04-08    200.100006
2019-04-09    199.500000
2019-04-10    200.619995
Name: Close, dtype: float64

In [14]:
apple_stock_data.loc['2019-03-26']   # type = pd.Series

Open     191.660004
High     192.880005
Low      184.580002
Close    186.789993
Name: 2019-03-26 00:00:00, dtype: float64

In [15]:
apple_stock_data.iloc[0]

Open     180.000000
High     182.669998
Low      179.369995
Close    180.910004
Name: 2019-03-12 00:00:00, dtype: float64

In [16]:
apple_stock_data.iloc[0:5]   # type = pd.Dataframe

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,180.0,182.669998,179.369995,180.910004
2019-03-13,182.25,183.300003,180.919998,181.710007
2019-03-14,183.899994,184.100006,182.559998,183.729996
2019-03-15,184.850006,187.330002,183.740005,186.119995
2019-03-18,185.800003,188.389999,185.789993,188.020004


Where possible pandas will convert them to Series

In [17]:
apple_stock_data['Open']['2019-03-12']

180.0

In [18]:
apple_stock_data.loc['2019-03-12']['Open']

180.0

In [19]:
first_five = apple_stock_data.iloc[0:5]
first_five.describe()

Unnamed: 0,Open,High,Low,Close
count,5.0,5.0,5.0,5.0
mean,183.360001,185.158002,182.475998,184.098001
std,2.29058,2.545833,2.482726,2.977912
min,180.0,182.669998,179.369995,180.910004
25%,182.25,183.300003,180.919998,181.710007
50%,183.899994,184.100006,182.559998,183.729996
75%,184.850006,187.330002,183.740005,186.119995
max,185.800003,188.389999,185.789993,188.020004


In [20]:
first_five.max() # returns for each column

Open     185.800003
High     188.389999
Low      185.789993
Close    188.020004
dtype: float64

In [21]:
first_five.max(axis=0) # axis default value is 0

Open     185.800003
High     188.389999
Low      185.789993
Close    188.020004
dtype: float64

In [22]:
first_five.max(axis=1)

Date
2019-03-12    182.669998
2019-03-13    183.300003
2019-03-14    184.100006
2019-03-15    187.330002
2019-03-18    188.389999
dtype: float64

In [23]:
first_five.idxmax(axis=0)

Open    2019-03-18
High    2019-03-18
Low     2019-03-18
Close   2019-03-18
dtype: datetime64[ns]

In [24]:
first_five.idxmax(axis=1)

Date
2019-03-12    High
2019-03-13    High
2019-03-14    High
2019-03-15    High
2019-03-18    High
dtype: object

### Modify

In [25]:
first_five

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,180.0,182.669998,179.369995,180.910004
2019-03-13,182.25,183.300003,180.919998,181.710007
2019-03-14,183.899994,184.100006,182.559998,183.729996
2019-03-15,184.850006,187.330002,183.740005,186.119995
2019-03-18,185.800003,188.389999,185.789993,188.020004


In [28]:
first_five.iloc[0] = [1, 2, 3, 4] # modifies original
first_five

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,1.0,2.0,3.0,4.0
2019-03-13,182.25,183.300003,180.919998,181.710007
2019-03-14,183.899994,184.100006,182.559998,183.729996
2019-03-15,184.850006,187.330002,183.740005,186.119995
2019-03-18,185.800003,188.389999,185.789993,188.020004


In [30]:
first_five.iloc[0] = 1
first_five

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,1.0,1.0,1.0,1.0
2019-03-13,182.25,183.300003,180.919998,181.710007
2019-03-14,183.899994,184.100006,182.559998,183.729996
2019-03-15,184.850006,187.330002,183.740005,186.119995
2019-03-18,185.800003,188.389999,185.789993,188.020004


you can either set it to one value or set all the values but not something inbetween

In [31]:
first_five['Open'] = 2
first_five

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,2,1.0,1.0,1.0
2019-03-13,2,183.300003,180.919998,181.710007
2019-03-14,2,184.100006,182.559998,183.729996
2019-03-15,2,187.330002,183.740005,186.119995
2019-03-18,2,188.389999,185.789993,188.020004


In [32]:
first_five['Open'] = [1, 2, 3, 4, 5]
first_five

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,1,1.0,1.0,1.0
2019-03-13,2,183.300003,180.919998,181.710007
2019-03-14,3,184.100006,182.559998,183.729996
2019-03-15,4,187.330002,183.740005,186.119995
2019-03-18,5,188.389999,185.789993,188.020004


In [33]:
first_five.loc['2019-03-13', 'Low'] = 5
first_five

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,1,1.0,1.0,1.0
2019-03-13,2,183.300003,5.0,181.710007
2019-03-14,3,184.100006,182.559998,183.729996
2019-03-15,4,187.330002,183.740005,186.119995
2019-03-18,5,188.389999,185.789993,188.020004


In [35]:
first_five.transpose() # returns a new dataframe

Date,2019-03-12,2019-03-13,2019-03-14,2019-03-15,2019-03-18
Open,1.0,2.0,3.0,4.0,5.0
High,1.0,183.300003,184.100006,187.330002,188.389999
Low,1.0,5.0,182.559998,183.740005,185.789993
Close,1.0,181.710007,183.729996,186.119995,188.020004


In [37]:
first_five * 2 # returns a new dataframe

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-12,2,2.0,2.0,2.0
2019-03-13,4,366.600006,10.0,363.420014
2019-03-14,6,368.200012,365.119996,367.459992
2019-03-15,8,374.660004,367.48001,372.23999
2019-03-18,10,376.779998,371.579986,376.040008
