-
Notifications
You must be signed in to change notification settings - Fork 3
/
RelDBProcessor.py
133 lines (110 loc) · 4.84 KB
/
RelDBProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from QueryProcessor import *
from sqlite3 import connect
import pandas as pd
import json
class AnnotationProcessor(Processor):
def __init__(self):
super().__init__()
def uploadData(self, path: str) -> bool:
try:
data = pd.read_csv(path, keep_default_na=False, dtype={
"id": "string",
"body": "string",
"target": "string",
"motivation": "string"})
df_images = data[["body"]]
df_images = df_images.rename(columns={'body': 'id'})
with connect(self.dbPathOrUrl) as conn:
data.to_sql('Annotation', conn, if_exists="replace", index=False, dtype={
"id": "string",
"body": "string",
"target": "string",
"motivation": "string"})
df_images.to_sql('Image', conn, if_exists="replace", index=False, dtype={"id": "string"})
return True
except Exception as e:
print(e)
return False
class MetadataProcessor(Processor):
def __init__(self):
super().__init__()
def uploadData(self, path: str) -> bool:
try:
df = pd.read_csv(path, keep_default_na=False, dtype={
"id": "string",
"title": "string",
"creator": "string"})
creator_df = df[df['creator'] != ''][['id', 'creator']]
creator_df['creator'] = creator_df['creator'].str.split('; ')
creator_df = creator_df.explode('creator')
with connect(self.dbPathOrUrl) as conn:
df.to_sql('Metadata', conn, if_exists="replace", index=False)
creator_df.to_sql("EntityCreator", conn, if_exists="replace", index=False)
return True
except Exception as e:
print(e)
return False
class RelationalQueryProcessor(QueryProcessor):
def __init__(self):
super().__init__()
def getAllAnnotations(self):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id, body, target, motivation
FROM Annotation
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getAllImages(self):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id FROM Image
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getAnnotationsWithBody(self, bodyId: str):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id, body, target, motivation
FROM Annotation
WHERE body == '{bodyId}'
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getAnnotationsWithBodyAndTarget(self, bodyId: str, targetId: str):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id, body, target, motivation
FROM Annotation
WHERE body == '{bodyId}' AND target == '{targetId}'
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getAnnotationsWithTarget(self, targetId: str):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id, body, target, motivation
FROM Annotation
WHERE target == '{targetId}'
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getEntitiesWithCreator(self, creatorName: str):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT m.id, m.creator, m.title
FROM Metadata m
JOIN EntityCreator ecr ON m.id = ecr.id
WHERE ecr.creator == '{creatorName}'
'''
df_sql = pd.read_sql(query, con)
return df_sql
def getEntitiesWithTitle(self, title: str):
with connect(self.dbPathOrUrl) as con:
query = f'''
SELECT DISTINCT id, title, creator
FROM Metadata
WHERE title == '{title}'
'''
df_sql = pd.read_sql(query, con)
return df_sql