-
Notifications
You must be signed in to change notification settings - Fork 3
/
beta_2_manager.py
197 lines (180 loc) · 7.9 KB
/
beta_2_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import os
import gspread
import json
import sys
from oauth2client import file
from datetime import datetime as dt
import httplib2
import dateparser
import subprocess
import configparser
from insta_harvesters import get_live as insta_get_live
from insta_harvesters import get_account as insta_get_account
from insta_harvesters import get_video as insta_get_video
from tiktok_harvesters_3 import get_tiktok_video as tiktok_get_video
from tiktok_harvesters_selenium3 import get_tiktok_videos as tiktok_get_videos
from facebook_harvesters_2 import get_video as facebook_get_video
from facebook_harvesters_2 import get_videos as facebook_get_videos
from twitter_harvesters import get_tweet as twitter_get_tweet
from twitter_harvesters import get_account as twitter_get_account
from youtube_harvesters import get_video as youtube_get_video
from youtube_harvesters import get_channel as youtube_get_channel
from youtube_harvesters import get_user as youtube_get_user
from youtube_harvesters import get_playlist as youtube_get_playlist
from vimeo_harvesters import get_video as vimeo_get_video
from vimeo_harvesters import get_channel as vimeo_get_channel
secrets_and_credentials_fold = 'C:\Source\sercrets_and_credentials'
script_folder = os.getcwd()
config = configparser.ConfigParser()
config.read(os.path.join(secrets_and_credentials_fold,"secret"))
#print(os.path.join(secrets_and_credentials_fold,"secret"))
## spreadsheet
sprsh = config.get("social_media_harvester","google_spreadsheet_key")
## credentials
client_secrets_file = os.path.join(secrets_and_credentials_fold, "client_secrets.json")
store = file.Storage(client_secrets_file )
creds = store.get()
#automatically renews google credentials
if creds.access_token_expired:
creds.refresh(httplib2.Http())
#authorizing credentials
c = gspread.authorize(creds)
#gets spreadsheet
gs = c.open_by_key(sprsh)
#gets sheet by name
ws = gs.get_worksheet(0)
### make storage folder
storage_folder_root = "./harvests"
if not os.path.exists(storage_folder_root):
os.makedirs(storage_folder_root)
if not os.path.exists('my_content_types.txt'):
my_content_types = []
else:
with open('my_content_types.txt') as data:
my_content_types = [x for x in data.read().split('\n') if x != ""]
class Item():
"""
This is the exchange object that is passed between the manager script, and the harvesters.
There is a minimum population of data thats described in the technical_notes.md file in the git
Its designed to be extensible - if there is data a harvester needs to undertake the harvest thats user supplied
it should be in this data object and not set in the harvester.
Its generated from a row of the linked spreadsheet and has some minor additional derived data
Its return from the harvester with a storage location string (actaully a confirmation of sorts the data exists at instanciaton)
and a complete (True|False) flag
"""
def __init__(self, row, row_number):
self.id = row[0]
self.description = row[1]
self.creator = row[2]
self.ready = row[3]
self.category = row[4]
self.location = row[5]
self.content_type = row[6]
self.url = row[7]
if row[8] == "NA":
self.date_range = None
else:
self.date_range = dateparser.parse(row[8])
self.reccuring = row[9]
self.scope = row[10]
self.archived = dateparser.parse(row[11])
self.collected = row[12]
self.responsible = row[13]
self.storage_location = row[14]
self.notes = row[15]
self.completed = False
self.storage_folder = os.path.join(storage_folder_root, self.id)
self.row_number = row_number
self.agent_name = ""
self.archived_start_date = ""
if self.reccuring == "Y":
if self.archived == None:
self.archived_start_date = self.date_range
else:
self.archived_start_date = self.archived
if self.reccuring == "N":
self.archived_start_date = self.date_range
def item_parser(item):
"""
This it function that handles the inferface between the mamanger and harvester.
It takes a properly populated item() class data object and passes it to the approriate harvester based on the contentType string.
New Harvesters are added here.
"""
print("here0")
if item.ready == "Y" and item.collected != "Y":
print("here")
if item.content_type == "InstagramAccount" and item.scontent_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = insta_get_account(item)
elif item.content_type == "InstagramLive" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = insta_get_live(item)
elif item.content_type == "InstagramVideo" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = insta_get_video(item)
elif item.content_type == "TiktokVideo" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = tiktok_get_video(item)
elif item.content_type == "TiktokVideos" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = tiktok_get_videos(item)
elif item.content_type == "FacebookVideo" and item.content_type in my_content_types:
print (f"working osan: {item.id} - {item.content_type}")
item = facebook_get_video(item)
elif item.content_type == "FacebookVideos" and item.content_type in my_content_types:
print (f"working osan: {item.id} - {item.content_type}")
item = facebook_get_videos(item)
elif item.content_type == "TwitterTweet" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = twitter_get_tweet(item)
elif item.content_type == "TwitterAccount" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = twitter_get_account(item)
elif item.content_type == "VimeoVideo" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = vimeo_get_video(item)
elif item.content_type == "VimeoChannel" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = vimeo_get_channel(item)
elif item.content_type == "YoutubeVideo" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = youtube_get_video(item)
elif item.content_type == "YoutubeChannel" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = youtube_get_channel(item)
elif item.content_type == "YoutubeUser" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = youtube_get_user(item)
elif item.content_type == "YoutubePlaylist" and item.content_type in my_content_types:
print (f"working on: {item.id} - {item.content_type}")
item = youtube_get_playlist(item)
### when the havester is finished it returns the item back to this function
### If item.completed is populated True it writes the completion to the sheet
if item.completed:
write_to_spreadsheet(item)
def write_to_spreadsheet(item):
"""
Writes to spreadsheet:
collected (bool - item.completed)
responsible (string - item.agent_name)
storage location (string - item.storage_folder)
Date Archived (string - datetime.now())
"""
ws.update_cell(item.row_number, 12, dt.now().strftime("%d.%m.%Y") )
if item.reccuring == "N":
ws.update_cell(item.row_number, 13, "Y")
else:
ws.update_cell(item.row_number, 13, "N")
ws.update_cell(item.row_number, 14, item.agent_name)
ws.update_cell(item.row_number, 15, item.storage_folder)
def main():
"""
Initiates reading spreadsheet by rows
"""
row_count = ws.row_count
for row_number, row in enumerate(ws.get_all_values()[1:], start = 2):
if row_number==196:#and row_number in ['110025', '110032', '110033', '110039', '100094', '100117', '110164', '110178', '110183', '110184', '110185', '110186', '110187', '110195', '110199', '11200']:
item = Item(row, row_number)
item_parser(item)
if __name__ == "__main__":
main()