# Import Data

In [2]:
import numpy as np
import pandas as pd

## 01 Series (one column)

In [4]:
ser = pd.Series(np.random.random(5), name = "Column 01")
ser

0    0.810855
1    0.314075
2    0.625167
3    0.543191
4    0.582925
Name: Column 01, dtype: float64

In [5]:
ser[1]

0.31407540231726494

## 02 DataFrame (several columns)

### Part I: Data of a company

In [8]:
# Read the data of PG from Yahoo Finance
import yfinance as yf
PG = yf.download('PG', start='1995-01-01', end = '2023-12-31')

[*********************100%***********************]  1 of 1 completed


In [9]:
# Data Information
PG.info()

print(PG.head())
# print(PG.head(20))
print(PG.tail())
# print(PG.tail(20))

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7300 entries, 1995-01-03 to 2023-12-29
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   (Close, PG)   7300 non-null   float64
 1   (High, PG)    7300 non-null   float64
 2   (Low, PG)     7300 non-null   float64
 3   (Open, PG)    7300 non-null   float64
 4   (Volume, PG)  7300 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 342.2 KB
Price          Close      High       Low      Open   Volume
Ticker            PG        PG        PG        PG       PG
Date                                                       
1995-01-03  7.441166  7.456079  7.366606  7.381518  3318400
1995-01-04  7.381520  7.470993  7.306959  7.411345  2218800
1995-01-05  7.277133  7.366605  7.262220  7.336781  2319600
1995-01-06  7.292041  7.351690  7.232393  7.232393  3438000
1995-01-09  7.262219  7.351692  7.247307  7.321868  1795200
Price            Close        High         

### Part II: Data of several company

In [11]:
import yfinance as yf
import pandas as pd

In [12]:
# Define stock codes
tickers = ['AAPL', 'MSFT', 'TSLA', 'GE']

In [13]:
# Create Dataset
new_data = pd.DataFrame()

In [14]:
# Collect data for each stock
for ticker in tickers:
    stock_data = yf.download(ticker, start='1995-01-01', end='2023-12-31')
    new_data[ticker] = stock_data['Close']

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [15]:
print(new_data.head())

                AAPL      MSFT  TSLA         GE
Date                                           
1995-01-03  0.285766  2.319453   NaN  20.200663
1995-01-04  0.293213  2.336313   NaN  20.200663
1995-01-05  0.289489  2.297776   NaN  20.250179
1995-01-06  0.312760  2.336313   NaN  20.151142
1995-01-09  0.306826  2.321862   NaN  19.953098


In [16]:
print(new_data.tail())

                  AAPL        MSFT        TSLA          GE
Date                                                      
2023-12-22  192.656174  372.543915  252.539993  100.383560
2023-12-26  192.108856  372.623505  256.609985  101.001595
2023-12-27  192.208374  372.036713  261.440002  101.643799
2023-12-28  192.636276  373.240112  253.179993  101.738937
2023-12-29  191.591385  373.995972  248.479996  101.191872


## 03 Pandas' methods

### 3.1 csv
#### Save to csv: .to_csv

In [19]:
new_data.to_csv("/Users/jiangyanze/Desktop/new_data.csv") # the address to save (reminder: end up with '.csv')

#### Read csv: .read_csv (DataFrame Type)

In [21]:
read_new_data_cvx = pd.read_csv("/Users/jiangyanze/Desktop/new_data.csv")

In [22]:
read_new_data_cvx.head()

Unnamed: 0,Date,AAPL,MSFT,TSLA,GE
0,1995-01-03,0.285766,2.319453,,20.200663
1,1995-01-04,0.293213,2.336313,,20.200663
2,1995-01-05,0.289489,2.297776,,20.250179
3,1995-01-06,0.31276,2.336313,,20.151142
4,1995-01-09,0.306826,2.321862,,19.953098


In [23]:
read_new_data_cvx.tail()

Unnamed: 0,Date,AAPL,MSFT,TSLA,GE
7295,2023-12-22,192.656174,372.543915,252.539993,100.38356
7296,2023-12-26,192.108856,372.623505,256.609985,101.001595
7297,2023-12-27,192.208374,372.036713,261.440002,101.643799
7298,2023-12-28,192.636276,373.240112,253.179993,101.738937
7299,2023-12-29,191.591385,373.995972,248.479996,101.191872


### 3.2 excel
#### Save to excel: .to_excel

In [25]:
new_data.to_excel("/Users/jiangyanze/Desktop/new_data.xlsx") # the address to save (reminder: end up with '.xlsx')

#### Read excel: .read_excel (DataFrame Type)

In [27]:
read_new_data_excel = pd.read_excel("/Users/jiangyanze/Desktop/new_data.xlsx")

In [28]:
read_new_data_excel.head()

Unnamed: 0,Date,AAPL,MSFT,TSLA,GE
0,1995-01-03,0.285766,2.319453,,20.200663
1,1995-01-04,0.293213,2.336313,,20.200663
2,1995-01-05,0.289489,2.297776,,20.250179
3,1995-01-06,0.31276,2.336313,,20.151142
4,1995-01-09,0.306826,2.321862,,19.953098


In [29]:
read_new_data_excel.tail()

Unnamed: 0,Date,AAPL,MSFT,TSLA,GE
7295,2023-12-22,192.656174,372.543915,252.539993,100.38356
7296,2023-12-26,192.108856,372.623505,256.609985,101.001595
7297,2023-12-27,192.208374,372.036713,261.440002,101.643799
7298,2023-12-28,192.636276,373.240112,253.179993,101.738937
7299,2023-12-29,191.591385,373.995972,248.479996,101.191872


### 3.3 Change index

#### parameter: index_col='---'

In [32]:
read_new_data_excel_3_3_1= pd.read_excel("/Users/jiangyanze/Desktop/new_data.xlsx",index_col="Date")

In [33]:
read_new_data_excel_3_3_1.head()

Unnamed: 0_level_0,AAPL,MSFT,TSLA,GE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1995-01-03,0.285766,2.319453,,20.200663
1995-01-04,0.293213,2.336313,,20.200663
1995-01-05,0.289489,2.297776,,20.250179
1995-01-06,0.31276,2.336313,,20.151142
1995-01-09,0.306826,2.321862,,19.953098


#### function: set_index('---')

In [35]:
read_new_data_excel_3_3_2= pd.read_excel("/Users/jiangyanze/Desktop/new_data.xlsx")
read_new_data_excel_3_3_2.set_index("Date")

Unnamed: 0_level_0,AAPL,MSFT,TSLA,GE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1995-01-03,0.285766,2.319453,,20.200663
1995-01-04,0.293213,2.336313,,20.200663
1995-01-05,0.289489,2.297776,,20.250179
1995-01-06,0.312760,2.336313,,20.151142
1995-01-09,0.306826,2.321862,,19.953098
...,...,...,...,...
2023-12-22,192.656174,372.543915,252.539993,100.383560
2023-12-26,192.108856,372.623505,256.609985,101.001595
2023-12-27,192.208374,372.036713,261.440002,101.643799
2023-12-28,192.636276,373.240112,253.179993,101.738937


In [36]:
read_new_data_excel_3_3_2.head()
# If you see the data directly, no change!!

Unnamed: 0,Date,AAPL,MSFT,TSLA,GE
0,1995-01-03,0.285766,2.319453,,20.200663
1,1995-01-04,0.293213,2.336313,,20.200663
2,1995-01-05,0.289489,2.297776,,20.250179
3,1995-01-06,0.31276,2.336313,,20.151142
4,1995-01-09,0.306826,2.321862,,19.953098


In [37]:
read_new_data_excel_3_3_3 = read_new_data_excel_3_3_2.set_index("Date")

In [38]:
read_new_data_excel_3_3_3.head()

Unnamed: 0_level_0,AAPL,MSFT,TSLA,GE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1995-01-03,0.285766,2.319453,,20.200663
1995-01-04,0.293213,2.336313,,20.200663
1995-01-05,0.289489,2.297776,,20.250179
1995-01-06,0.31276,2.336313,,20.151142
1995-01-09,0.306826,2.321862,,19.953098
