### 確認安裝成功

In [2]:
import pandas as pd

# 檢查 Pandas 版本
print("Pandas version:", pd.__version__)

# 建立簡單的 DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)

# 顯示 DataFrame
print("\nSample DataFrame:\n", df)

Pandas version: 2.2.2

Sample DataFrame:
       Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


### series

In [3]:
s = pd.Series([1, 3, 5, 7, 9])
print(s)

0    1
1    3
2    5
3    7
4    9
dtype: int64


### dataframe

In [4]:
# 從字典創建 DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston


In [5]:
# 取得 Series 中的元素
print(s[2])

print('\n')
# 選擇 DataFrame 中的某一列
print(df['Name'])

5


0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object


### read_csv

In [6]:
df_csv=pd.read_csv('data.csv')

In [7]:
# 顯示 DataFrame 的前幾行
print(df_csv.head())

# 檢查列名
print(df_csv.columns)

# 檢查資料型態
print(df_csv.dtypes)

# 獲取基本統計資訊
print(df_csv.describe())

      Name  Age  Gender           City
0     John   25    Male       New York
1    Emily   30  Female  San Francisco
2  Michael   35    Male        Chicago
3   Sophia   28  Female    Los Angeles
4   Daniel   40    Male          Miami
Index(['Name', 'Age', 'Gender', 'City'], dtype='object')
Name      object
Age        int64
Gender    object
City      object
dtype: object
            Age
count  10.00000
mean   30.40000
std     8.82169
min    18.00000
25%    25.50000
50%    29.00000
75%    36.50000
max    45.00000


### 選擇行和列

In [8]:
# 創建一個範例 DataFrame
data = {'A': [1, 2, 3],
        'B': [4, 5, 6], 
        'C': [7, 8, 9]}
df=pd.DataFrame(data)
print("原資料：")
print(df)
# 選擇列 "A" 和 "B"
selected_columns=df[['A','B']]
print("\n選擇資料：")
print(selected_columns)

原資料：
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9

選擇資料：
   A  B
0  1  4
1  2  5
2  3  6


### 條件篩選

In [9]:
filtered_rows=df[df['A']>1]
print(filtered_rows)

   A  B  C
1  2  5  8
2  3  6  9


### 索引方法

In [10]:
selected_data_loc=df.loc[1:2,['A','C']]
selected_data_loc

Unnamed: 0,A,C
1,2,8
2,3,9


In [11]:
selected_data_iloc=df.iloc[1:3,[0,2]]
selected_data_iloc

Unnamed: 0,A,C
1,2,8
2,3,9


### 移除重複值

In [14]:
data = {'Name': ['John', 'Jane', 'John', 'Alice', 'Bob', 'Bob'],
        'Age': [25, 30, 25, 35, 40, 40]}
df=pd.DataFrame(data)
df_cleaned=df.drop_duplicates()
print('移除前：')
print(df)
print('\n移除後：')
print(df_cleaned)

移除前：
    Name  Age
0   John   25
1   Jane   30
2   John   25
3  Alice   35
4    Bob   40
5    Bob   40

移除後：
    Name  Age
0   John   25
1   Jane   30
3  Alice   35
4    Bob   40


### 處理遺失值

In [17]:
import numpy as np
# 帶有遺失值的範例 DataFrame
data = {'Name': ['John', 'Jane', np.nan, 'Alice', 'Bob'],
        'Age': [25, np.nan, 30, 35, np.nan]}
df = pd.DataFrame(data)
# 檢查遺失值
print('檢查是否遺失：')
print(df.isnull())
# 使用特定值填充遺失值
df_filled=df.fillna(18)
print('\n填補前：')
print(df)
print('\n填補後：')
print(df_filled)

檢查是否遺失：
    Name    Age
0  False  False
1  False   True
2   True  False
3  False  False
4  False   True

填補前：
    Name   Age
0   John  25.0
1   Jane   NaN
2    NaN  30.0
3  Alice  35.0
4    Bob   NaN

填補後：
    Name   Age
0   John  25.0
1   Jane  18.0
2     18  30.0
3  Alice  35.0
4    Bob  18.0


### 數據類型轉換

In [18]:
# 帶有混合資料類型的範例 DataFrame
data = {'Name': ['John', 'Jane', 'Alice', 'Bob'],
        'Age': ['25', '30', '35', '40']}
df = pd.DataFrame(data)
print('轉換前：')
print(df.dtypes)
# 將 Age 從字串轉換為整數
df['Age']=df['Age'].astype(int)
print('\n轉換後：')
print(df.dtypes)

轉換前：
Name    object
Age     object
dtype: object

轉換後：
Name    object
Age      int32
dtype: object


### 分組聚合

In [20]:
data = {
    'Date': ['2022-01-01', '2022-01-01', '2022-01-02', '2022-01-02'],
    'Product': ['A', 'B', 'A', 'B'],
    'Quantity Sold': [100, 150, 120, 200],
    'Revenue': [5000, 7500, 6000, 10000]
}
df = pd.DataFrame(data)
print(df)
# 按產品對資料進行分組，然後計算每個產品組的總銷售數量和收入
grouped_data=df.groupby('Product').agg({'Quantity Sold':'sum','Revenue':'sum'})
print('\n分組資料：')
print(grouped_data)

         Date Product  Quantity Sold  Revenue
0  2022-01-01       A            100     5000
1  2022-01-01       B            150     7500
2  2022-01-02       A            120     6000
3  2022-01-02       B            200    10000

分組資料：
         Quantity Sold  Revenue
Product                        
A                  220    11000
B                  350    17500


### 應用函式

In [28]:
# 計算利潤率的函式
def calculate_profit_margin(row):
    return (row['Revenue']-row['Cost'])/row['Revenue']*100
# 將 Cost 欄位加入資料集
df['Cost'] = [4000, 6000, 4500, 8000]
# 計算利潤率
df['Profit Margin (%)'] = df.apply(calculate_profit_margin,axis=1)
df

Unnamed: 0,Date,Product,Units Sold,Revenue,Profit Margin (%),Profit,Cost
0,2022-01-01,A,100,5000,20.0,1000,4000
1,2022-01-01,B,150,7500,20.0,1500,6000
2,2022-01-02,A,120,6000,25.0,1500,4500
3,2022-01-02,B,200,10000,20.0,2000,8000


### 新增、刪除和重新命名欄位

In [29]:
# 加入 Profit 欄位並計算利潤
df=df.assign(Profit=df['Revenue']-df['Cost'])
# 刪除 Cost 欄位
df=df.drop('Cost',axis=1)
# 重新命名欄位
df=df.rename(columns={'Quantity Sold':'Units Sold'})
df


Unnamed: 0,Date,Product,Units Sold,Revenue,Profit Margin (%),Profit
0,2022-01-01,A,100,5000,20.0,1000
1,2022-01-01,B,150,7500,20.0,1500
2,2022-01-02,A,120,6000,25.0,1500
3,2022-01-02,B,200,10000,20.0,2000


### 合併不同資料集

In [30]:
# 範例產品資料
product_data = {
    'Product': ['A', 'B'],
    'Product Name': ['Apple', 'Banana'],
    'Location': ['New York', 'Los Angeles']
}
product_df = pd.DataFrame(product_data)
# 根據 Product 欄位合併兩個資料集
merged_df = pd.merge(df, product_df, on='Product', how='left')
merged_df

Unnamed: 0,Date,Product,Units Sold,Revenue,Profit Margin (%),Profit,Product Name,Location
0,2022-01-01,A,100,5000,20.0,1000,Apple,New York
1,2022-01-01,B,150,7500,20.0,1500,Banana,Los Angeles
2,2022-01-02,A,120,6000,25.0,1500,Apple,New York
3,2022-01-02,B,200,10000,20.0,2000,Banana,Los Angeles


### 資料輸出

In [31]:
# 範例 DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
# 儲存為 CSV
df.to_csv('sample_data.csv')