# CSV & EXCEL

In [None]:
import pandas as pd
pd.read_csv("./data/microbiome.csv", header='infer', sep=',', encoding="utf-8").head()

In [None]:
dx1 = pd.read_excel('./data/Sample_xls.xlsx', sheet_name='sheet1', header=None)
dx2 = pd.read_excel('./data/Sample_xls.xlsx', sheet_name='sheet2', header=None)
print(dx1.head())
print('-'*100)
print(dx2.head())

In [None]:
population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
area_dict = {'California': 423967, 'Texas': 695662, 'Illinois': 149995, 'New York': 141297,'Florida': 170312}
area = pd.Series(area_dict)
income_dict = {'California': 75277, 'Texas': 60629, 'Illinois': 65030, 'New York': 71855,'Florida': 53267}
income = pd.Series(area_dict)
data=pd.DataFrame({'pop': population,'area': area, 'income': income}) 
display(data)

In [None]:
data.to_csv('./data/output/to_csv_eg.csv',index=False,header=False)
data.to_excel('./data/output/to_xls_eg.xlsx',index=False,header=False,sheet_name='sheet_name')

# DataFrame select & indexing
<details>
    <summary>dataframe結構圖</summary>
    <img src = './img/creating_dataframe1.png'>
</details>
<details>
    <summary>dataframe的Series結構圖</summary>
    <details>
        <summary>columns' series</summary>
        <img src = './img/dataSER-1.png'>
    </details>
    <details>
        <summary>index lables' series</summary>
        <img src = './img/Untitled.png'>
    </details>
</details>



#### 使用 Dictionary 風格來存取(透過欄名稱索引) 

In [None]:
data['area']

#### Dictionary 風格的語法也可以用來修改物件，或是建立⼀個新欄位 

In [None]:
data['density'] = data['pop'] / data['area']

#### DataFrame的屬性

In [None]:
data.columns

In [None]:
data.index

In [None]:
data.values

#### loc

In [None]:
data.loc['Texas']

In [None]:
data.loc['Illinois', 'pop']

In [None]:
data.loc[:'Texas', :'pop'] 

#### iloc

In [None]:
data.iloc[-1]

In [None]:
data.iloc[1,2]

In [None]:
data.iloc[:3, :2]

#### Masking indexing

In [None]:
data['density'] > 100

In [None]:
data[data['density'] > 100]

#### Fancy indexing

In [None]:
data[['pop','income','density']]

#### combine masking and fancy indexing as in the following:

In [None]:
data.loc[(data.density > 100).values, ['pop', 'density']]

In [None]:
data.iloc[(data.density > 100).values, [0,2]]

# JSON & xml

In [None]:
dj = pd.read_json('./data/data.json')
dj.head(10)

In [None]:
import xml.etree.ElementTree as XET
tree = XET.parse('./data/County_h_10906.xml')  # 以XET套件載入XML檔案
root = tree.getroot()         # 取得XML表格 
for (i,tag_lv1) in enumerate(root):
    print(tag_lv1.tag)
    for tag_lv2 in tag_lv1:
        print(tag_lv2.tag)
    if i == 2:
        break

In [None]:
print([(node.find('欄位1').text, node.find('欄位2').text, node.find('欄位3').text) for node in root.findall('County_h_10906')][:3])

# MySQL

In [None]:
import sqlalchemy as db
import pandas.io.sql as sql
import pandas as pd

#連接資料庫
username = 'root'     # 資料庫帳號
password = ''     # 資料庫密碼
host = 'localhost'    # 資料庫位址
port = '3306'         # 資料庫埠號
database = 'classicmodels'   # 資料庫名稱
table = 'offices'   # 表格名稱
# 建立資料庫引擎
engine = db.create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')
# 建立資料庫連線
# con = engine.raw_connection()
con = engine.connect()

#readinto dataframe
df = sql.read_sql(f'SELECT * FROM `{database}`.`{table}`;', con)
df.tail()

In [None]:
df_append = pd.DataFrame([{'officeCode': 8,'city':'Taipei',
                           'phone':'1234567891','addressLine1':'Taipei DaAn',
                           'addressLine2':'Taipei DaAn2','state':'Taipei',
                           'country':'Taiwan','postalCode':'123','territory':'Asia'}])
df_append.tail()

In [None]:
sql.to_sql(df_append, name=table, con=con, if_exists='append',index=False)

In [None]:
df = sql.read_sql(f'SELECT * FROM `{database}`.`{table}`;', con)
df.tail()

# 補充

In [None]:
metadata = db.MetaData()
# 取得 test 資料表的 Python 對應操作物件
tableoffices = db.Table('offices', metadata, autoload=True, autoload_with=engine)

# DELETE
query = db.delete(tableoffices).where(tableoffices.c.officeCode == 8)
proxy = con.execute(query)