From 670196b804e0b9455e1c35e5db38d19ccc57a597 Mon Sep 17 00:00:00 2001 From: Arunmozhi Date: Fri, 1 Jul 2011 22:36:25 +0530 Subject: [PATCH] dont know state! uploading finished --- app.yaml | 8 ++++++++ bulkloader.yaml | 43 +++++++++++++++++++++++++++++++++++++++++++ main.py | 20 ++++++++++---------- 3 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 bulkloader.yaml diff --git a/app.yaml b/app.yaml index e8ff283..e491a6c 100644 --- a/app.yaml +++ b/app.yaml @@ -3,6 +3,14 @@ version: 1 runtime: python api_version: 1 + handlers: +- url: /admin/remote_api + script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py + login: admin + - url: .* script: main.py + +builtins: +- datastore_admin: on \ No newline at end of file diff --git a/bulkloader.yaml b/bulkloader.yaml new file mode 100644 index 0000000..5e34bd7 --- /dev/null +++ b/bulkloader.yaml @@ -0,0 +1,43 @@ +# Autogenerated bulkloader.yaml file. +# You must edit this file before using it. TODO: Remove this line when done. +# At a minimum address the items marked with TODO: +# * Fill in connector and connector_options +# * Review the property_map. +# - Ensure the 'external_name' matches the name of your CSV column, +# XML tag, etc. +# - Check that __key__ property is what you want. Its value will become +# the key name on import, and on export the value will be the Key +# object. If you would like automatic key generation on import and +# omitting the key on export, you can remove the entire __key__ +# property from the property map. + +# If you have module(s) with your model classes, add them here. Also +# change the kind properties to model_class. +python_preamble: +- import: base64 +- import: re +- import: google.appengine.ext.bulkload.transform +- import: google.appengine.ext.bulkload.bulkloader_wizard +- import: google.appengine.ext.db +- import: google.appengine.api.datastore +- import: google.appengine.api.users + +transformers: +- kind: Permission + connector: csv + + property_map: + - property: __key__ + external_name: key + export_transform: transform.key_id_or_name_as_string + + - property: id + external_name: id + export_transform: transform.key_id_or_name_as_string + + - property: link + external_name: link + + - property: text + external_name: text + import_transform: db.Text \ No newline at end of file diff --git a/main.py b/main.py index 20a8e24..c93eb9e 100644 --- a/main.py +++ b/main.py @@ -25,11 +25,12 @@ from google.appengine.ext import db - +class Test(db.Model): + lis = db.TextProperty() class Hook(db.Model): text = db.TextProperty() - page = db.LinkProperty() + page = db.StringProperty() category = db.StringProperty() # Remove Me later #projects = db.ListProperty(db.key, default=None) @@ -42,21 +43,20 @@ def get(self): f = urlfetch.fetch("http://en.wikipedia.org/wiki/Wikipedia:Recent_additions") soup = BeautifulSoup.BeautifulSoup(f.content) lis = soup.findAll("li", attrs={"style":"-moz-float-edge: content-box"}) + tet = Test() + tet.lis = lis.__str__() + tet.put() for li in lis: + dbhook = Hook() try: link = li.b.a["href"] except TypeError: link = li.find("a")["href"] - link = link.replace("/wiki/","") - hook = { "text" : li.text, "link" : link } - #store it in DB - dbhook = Hook() - dbhook.text = hook["text"] - dbhook.page = "http://en.wikipedia.org/wiki/"+hook["link"] + dbhook.text = str(li) + dbhook.page = link.replace("/wiki/","") dbhook.category = "June" dbhook.put() - self.response.out.write(unicode(hook["link"])) - self.response.out.write("
") + self.response.out.write("Done!") def main():