# Shows a basic pandas CSV import with explicit zipfile handling
by "explicit" we mean that the file action code is visible in this script

In [None]:
import pandas as pd
import zipfile as z
import pathlib
import os
import shutil

current_working_dir = os.getcwd()
OS_name = os.environ["OS"]

print("current_working_dir = " + current_working_dir)
print("OS_name = " + OS_name)

In [None]:
# HTTP - read CSV into dataframe
df2 = pd.read_csv("https://raw.githubusercontent.com/stevewatkins17/PythonForDataAnalysis/Jan2020/IpythonNotebooks/Data/demo_AmexTransactions.csv", header=None)
df4 = pd.read_csv("http://46.101.230.157/dilan/pandas_tutorial_read.csv", delimiter=";", names = ['my_datetime', 'event', 'country', 'user_id', 'source', 'topic'])

df2

In [None]:
# MACBOOK absolute path - read CSV into dataframe

#df1 = pd.read_csv("/Users/stevewatkins/Downloads/CodeLouisville/sw/PythonForDataAnalysis/IpythonNotebooks/Data/demo_AmexTransactions.csv", header=None)

#may need to unzip csv first 
#df3 = pd.read_csv("/Users/stevewatkins/Downloads/CodeLouisville/sw/PythonForDataAnalysis/IpythonNotebooks/Data/trips.csv", header="infer")

In [None]:
# WINDOWS absolute path - read CSV into dataframe
#df1 = pd.read_csv("C:/Users/stevewatkins/Downloads/temp2/PythonForDataAnalysis/demo_AmexTransactions.csv", header=None)
#df3 = pd.read_csv("C:/Users/stevewatkins/Downloads/temp2/PythonForDataAnalysis/trips.csv", header="infer")

In [None]:
p = pathlib.Path("Data")

pathfile_to_unzip = "Data/trips.csv.zip"

if OS_name == "Windows_NT":
    pathfile_to_unzip = "Data\\trips.csv.zip"

#get a list of dirs before unzip
dirs_before_unz = [x for x in p.iterdir() if x.is_dir()]

zf = z.ZipFile(pathfile_to_unzip,"r")
zf.extractall("Data")
zf.close()

## Context Manager version of above 
#with z.ZipFile("Data/trips.csv.zip","r") as zf:
#    zf.extractall("Data")

In [None]:
# WINDOWS or MAC - relative path - read CSV into dataframe
df1 = pd.read_csv("Data/demo_AmexTransactions.csv", header=None)
df3 = pd.read_csv("Data/trips.csv", header="infer")

In [None]:
## view results

#df1.head(10)
#df1.tail(2)
df3.sample(5)
#df4[["my_datetime", "source"]]

##series
#df4['user_id']

#df4[df4.source == 'Reddit']
#df4.sample(100)

#df4[['source']].count()
#df4[['source']].sum()
#df4[['user_id']].sum()

#df1
#df3


In [None]:
# using os, we delete the big csv, if exists
target_file = "Data/trips.csv"
  
if os.path.isfile(target_file): 
    os.remove(target_file) 
    print("File deleted") 
else: 
    print(f'"{target_file}" not found')



In [None]:

#get a list of dirs after unzip
dirs_after_unz = [x for x in p.iterdir() if x.is_dir()]

# we subtract the "before unzip" dir list from the "after unzip" dir list 
# the remainder of which is the newly created dir, which we will delete
dir_unz = (list(set(dirs_after_unz) - set(dirs_before_unz)))

In [None]:
# using pathlib, we delete the dir created by extracting from the zipfile

## target_dir hard-coded 
#target_dir = pathlib.Path.cwd() / "Data" / "__MACOSX"

## target_dir derived from code
target_dir = dir_unz[0]

try:
    shutil.rmtree(target_dir)
    print(f'dir "{target_dir}" deleted')
except OSError as e:
    print(f'dir "{target_dir}" does not exist')
    #print(f"Error: {target_dir} : {e.strerror}")
    
## another method
#pathlib.Path.rmdir(target_dir)


