Permalink
Browse files

dont know state! uploading finished

  • Loading branch information...
1 parent 825b07c commit 670196b804e0b9455e1c35e5db38d19ccc57a597 @tecoholic committed Jul 1, 2011
Showing with 61 additions and 10 deletions.
  1. +8 −0 app.yaml
  2. +43 −0 bulkloader.yaml
  3. +10 −10 main.py
View
@@ -3,6 +3,14 @@ version: 1
runtime: python
api_version: 1
+
handlers:
+- url: /admin/remote_api
+ script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py
+ login: admin
+
- url: .*
script: main.py
+
+builtins:
+- datastore_admin: on
View
@@ -0,0 +1,43 @@
+# Autogenerated bulkloader.yaml file.
+# You must edit this file before using it. TODO: Remove this line when done.
+# At a minimum address the items marked with TODO:
+# * Fill in connector and connector_options
+# * Review the property_map.
+# - Ensure the 'external_name' matches the name of your CSV column,
+# XML tag, etc.
+# - Check that __key__ property is what you want. Its value will become
+# the key name on import, and on export the value will be the Key
+# object. If you would like automatic key generation on import and
+# omitting the key on export, you can remove the entire __key__
+# property from the property map.
+
+# If you have module(s) with your model classes, add them here. Also
+# change the kind properties to model_class.
+python_preamble:
+- import: base64
+- import: re
+- import: google.appengine.ext.bulkload.transform
+- import: google.appengine.ext.bulkload.bulkloader_wizard
+- import: google.appengine.ext.db
+- import: google.appengine.api.datastore
+- import: google.appengine.api.users
+
+transformers:
+- kind: Permission
+ connector: csv
+
+ property_map:
+ - property: __key__
+ external_name: key
+ export_transform: transform.key_id_or_name_as_string
+
+ - property: id
+ external_name: id
+ export_transform: transform.key_id_or_name_as_string
+
+ - property: link
+ external_name: link
+
+ - property: text
+ external_name: text
+ import_transform: db.Text
View
20 main.py
@@ -25,11 +25,12 @@
from google.appengine.ext import db
-
+class Test(db.Model):
+ lis = db.TextProperty()
class Hook(db.Model):
text = db.TextProperty()
- page = db.LinkProperty()
+ page = db.StringProperty()
category = db.StringProperty() # Remove Me later
#projects = db.ListProperty(db.key, default=None)
@@ -42,21 +43,20 @@ def get(self):
f = urlfetch.fetch("http://en.wikipedia.org/wiki/Wikipedia:Recent_additions")
soup = BeautifulSoup.BeautifulSoup(f.content)
lis = soup.findAll("li", attrs={"style":"-moz-float-edge: content-box"})
+ tet = Test()
+ tet.lis = lis.__str__()
+ tet.put()
for li in lis:
+ dbhook = Hook()
try:
link = li.b.a["href"]
except TypeError:
link = li.find("a")["href"]
- link = link.replace("/wiki/","")
- hook = { "text" : li.text, "link" : link }
- #store it in DB
- dbhook = Hook()
- dbhook.text = hook["text"]
- dbhook.page = "http://en.wikipedia.org/wiki/"+hook["link"]
+ dbhook.text = str(li)
+ dbhook.page = link.replace("/wiki/","")
dbhook.category = "June"
dbhook.put()
- self.response.out.write(unicode(hook["link"]))
- self.response.out.write("<br />")
+ self.response.out.write("Done!")
def main():

0 comments on commit 670196b

Please sign in to comment.