# Introduction to Pandas

In [118]:
from pathlib import Path
import pandas as pd
from datetime import date
import csv

# iso format is yyyy-mm-dd
today = date.today().isoformat()

TESLA_BING_CSV = Path(r"Tesla Bing.csv")
TESLA_GOOGLE_CSV = Path(r"Tesla Google.csv")

# dtype="string" is a new feature of pandas 1.0.0.
# Previously, dtype=object was used to import as string

df1 = pd.read_csv(TESLA_BING_CSV,
                  encoding="UTF-16",
                  sep="\t",
                  keep_default_na=False,
                  dtype="string")

df2 = pd.read_csv(TESLA_GOOGLE_CSV,
                  encoding="UTF-16",
                  sep="\t",
                  keep_default_na=False,
                  dtype="string")

In [119]:
# Before replacement
print(df1["Chinese (Bing)"].head(5))

# Replace all instances of "特斯拉" in the Bing translation with "Tesla"
df1["Chinese (Bing)"] = df1["Chinese (Bing)"].str.replace("特斯拉", "Tesla ")
print(df1["Chinese (Bing)"].head(5))

0                                特斯拉的使命是加速世界向可持续能源的过渡。
1    特斯拉成立于2003年由一群工程师谁想证明, 人们不需要妥协, 以驱动电动-电动车可以更好,...
2          如今, 特斯拉不仅建造了全电动汽车, 而且还建立了可无限扩展的清洁能源生产和存储产品。
3                 特斯拉相信, 世界停止依赖化石燃料的速度越快, 迈向零排放的未来就越好。
4                 推出于 2008年, 敞篷跑车推出了特斯拉的尖端电池技术和电动动力总成。
Name: Chinese (Bing), dtype: string
0                             Tesla 的使命是加速世界向可持续能源的过渡。
1    Tesla 成立于2003年由一群工程师谁想证明, 人们不需要妥协, 以驱动电动-电动车可以...
2       如今, Tesla 不仅建造了全电动汽车, 而且还建立了可无限扩展的清洁能源生产和存储产品。
3              Tesla 相信, 世界停止依赖化石燃料的速度越快, 迈向零排放的未来就越好。
4              推出于 2008年, 敞篷跑车推出了Tesla 的尖端电池技术和电动动力总成。
Name: Chinese (Bing), dtype: string


In [120]:
# Add the Bing translation to the Google df
df2["Chinese (Bing)"] = df1["Chinese (Bing)"]
print(df2.columns)

Index(['English', 'Chinese (Google)', 'Character Count (English)',
       'Character Count (Chinese)', 'Translation Date', 'Chinese (Bing)'],
      dtype='object')


In [121]:
# add a column "Update Date" with today's date
df2["Update Date"] = today
print(df2["Update Date"].head(5))

0    2020-03-23
1    2020-03-23
2    2020-03-23
3    2020-03-23
4    2020-03-23
Name: Update Date, dtype: object


In [122]:
# before removal
print(df2["English"].head(5))

# remove all rows where "English" does not contain "electric"
# ~ inverts the dataframe, returning results where the condition is NOT true
subset = df2[~df2["English"].str.contains("electric")]
print(subset["English"].head(5))

0    Tesla’s mission is to accelerate the world’s t...
1    Tesla was founded in 2003 by a group of engine...
2    Today, Tesla builds not only all-electric vehi...
3    Tesla believes the faster the world stops rely...
4    Launched in 2008, the Roadster unveiled Tesla’...
Name: English, dtype: string
0     Tesla’s mission is to accelerate the world’s t...
3     Tesla believes the faster the world stops rely...
7                28 seconds as measured by Motor Trend.
8     In 2015, Tesla expanded its product line with ...
10    Soon after, Tesla unveiled the safest, most co...
Name: English, dtype: string


In [123]:
# Write the resulting subset to a csv file: UTF-16, tab delimited, no index, quotes around all fields
subset.to_csv("myCSV.csv",
              encoding="UTF-16",
              sep="\t",
              index=False,
              quoting=csv.QUOTE_ALL)