Skip to content

Commit a633552

Browse files
Merge pull request avinashkranjan#142 from Siddhant-K-code/master
PDF To CSV Converter
2 parents 0e20c53 + 99bba10 commit a633552

File tree

4 files changed

+104
-0
lines changed

4 files changed

+104
-0
lines changed

PDF To CSV Converter/main.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
print("[-+-] starting pdf_csv.py...")
2+
print("[-+-] import a pdf and convert it to a csv")
3+
# -----------------------------------------------------------------------------
4+
print("[-+-] importing required packages for pdf_csv.py...")
5+
import os
6+
import tabula # simple wrapper for tabula-java, read tables from PDF into csv
7+
#from modules.defaults import df # local module
8+
print("[-+-] pdf_csv.py packages imported! \n")
9+
#-----------------------------------------------------------------------------
10+
11+
# -----------------------------------------------------------------------------
12+
def pdf_csv(): # convert pdf to csv
13+
print("[-+-] default filenames:")
14+
filename = "sample1"
15+
pdf = filename + ".pdf"
16+
csv = filename + ".csv"
17+
print (pdf)
18+
print (csv + "\n")
19+
20+
print("[-+-] default directory:")
21+
print("[-+-] (based on current working directory of python file)")
22+
defaultdir = os.getcwd()
23+
print (defaultdir + "\n")
24+
25+
print("[-+-] default file paths:")
26+
pdf_path = os.path.join(defaultdir, pdf)
27+
csv_path = os.path.join(defaultdir, csv)
28+
print (pdf_path)
29+
print (csv_path + "\n")
30+
31+
print("[-+-] looking for default pdf...")
32+
if os.path.exists(pdf_path) == True: # check if the default pdf exists
33+
print("[-+-] pdf found: " + pdf + "\n")
34+
pdf_flag = True
35+
else:
36+
print("[-+-] looking for another pdf...")
37+
arr_pdf = [defaultdir for defaultdir in os.listdir() if defaultdir.endswith(".pdf")]
38+
if len(arr_pdf) == 1: # there has to be only 1 pdf in the directory
39+
print("[-+-] pdf found: " + arr_pdf[0] + "\n")
40+
pdf_path = os.path.join(defaultdir, arr_pdf[0])
41+
pdf_flag = True
42+
elif len(arr_pdf) > 1: # there are more than 1 pdf in the directory
43+
print("[-+-] more than 1 pdf found, exiting script!")
44+
pdf_flag = False
45+
# TODO add option to select from available pdfs
46+
else:
47+
print("[-+-] pdf cannot be found, exiting script!")
48+
pdf_flag = False
49+
50+
if pdf_flag == True:
51+
# check if csv exists at the default file path
52+
# if csv does not exist create a blank file at the default path
53+
try:
54+
print("[-+-] looking for default csv...")
55+
open(csv_path, "r")
56+
print("[-+-] csv found: " + csv + "\n")
57+
except IOError:
58+
print("[-+-] did not find csv at default file path!")
59+
print("[-+-] creating a blank csv file: " + csv + "... \n")
60+
open(csv_path, "w")
61+
62+
print("[-+-] converting pdf to csv...")
63+
# print("[-+-] pdf to csv conversion suppressed! \n")
64+
try:
65+
tabula.convert_into(pdf_path, csv_path, output_format="csv", pages="all")
66+
print ("[-+-] pdf to csv conversion complete!\n")
67+
except IOError:
68+
print("[-+-] pdf to csv conversion failed!")
69+
70+
print("[-+-] converted csv file can be found here: " + csv_path + "\n")
71+
72+
print("[-+-] finished pdf_csv.py successfully!")
73+
# -----------------------------------------------------------------------------
74+
75+
# -----------------------------------------------------------------------------
76+
pdf_csv() # run the program
77+
# -----------------------------------------------------------------------------
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tabula-py

PDF To CSV Converter/sample1.csv

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Date,Open,High,Low,Close / Last,Volume
2+
01/04/2017,62.48,62.75,62.12,62.3,"21,325,140"
3+
01/03/2017,62.79,62.84,62.125,62.58,"20,655,190"
4+
12/30/2016,62.96,62.99,62.03,62.14,"25,575,720"
5+
12/29/2016,62.86,63.2,62.73,62.9,"10,248,460"
6+
12/28/2016,63.4,63.4,62.83,62.99,"14,348,340"
7+
12/27/2016,63.21,64.07,63.21,63.28,"11,743,650"
8+
12/23/2016,63.45,63.54,62.8,63.24,"12,399,540"
9+
12/22/2016,63.84,64.1,63.405,63.55,"22,175,270"
10+
12/21/2016,63.43,63.7,63.12,63.54,"17,084,370"
11+
12/20/2016,63.69,63.8,63.025,63.54,"26,017,470"
12+
12/19/2016,62.56,63.77,62.42,63.62,"34,318,500"
13+
12/16/2016,62.95,62.95,62.115,62.3,"42,452,660"
14+
Date,Open,High,Low,Close / Last,Volume
15+
01/04/2017,117.55,119.66,117.29,118.69,"19,594,560"
16+
01/03/2017,116.03,117.84,115.51,116.86,"20,635,600"
17+
12/30/2016,116.595,116.83,114.7739,115.05,"18,668,290"
18+
12/29/2016,117,117.531,116.06,116.35,"9,925,082"
19+
12/28/2016,118.19,118.25,116.65,116.92,"11,985,740"
20+
12/27/2016,116.96,118.68,116.864,118.01,"12,034,590"
21+
12/23/2016,117,117.56,116.3,117.27,"10,885,030"
22+
12/22/2016,118.86,118.99,116.93,117.4,"16,226,770"
23+
12/21/2016,118.92,119.2,118.48,119.04,"10,747,610"
24+
12/20/2016,119.5,119.77,118.8,119.09,"13,673,570"
25+
12/19/2016,119.85,120.36,118.51,119.24,"15,871,360"
26+
12/16/2016,120.9,121.5,119.27,119.87,"25,316,220"

PDF To CSV Converter/sample1.pdf

23.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)