Skip to content

Commit

Permalink
added ubuntu packages as fathead
Browse files Browse the repository at this point in the history
  • Loading branch information
mrshu committed Jul 12, 2012
1 parent f70ca20 commit 11fa7e0
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 0 deletions.
6 changes: 6 additions & 0 deletions ubuntu_pkgs/README.txt
@@ -0,0 +1,6 @@
Ubuntu Packages plugin for DukcDuckGo

Dependencies:

Python 2.7
gzip (should be included in Python)
3 changes: 3 additions & 0 deletions ubuntu_pkgs/fetch.sh
@@ -0,0 +1,3 @@
#!/bin/bash

wget -q -P download -N 'http://packages.ubuntu.com/precise/allpackages?format=txt.gz'
15 changes: 15 additions & 0 deletions ubuntu_pkgs/meta.txt
@@ -0,0 +1,15 @@
# This is the name of the source as people would refer to it, e.g. Wikipedia or PerlDoc
Name: Ubuntu Packages

# This is the base domain where the source pages are located.
Domain: packages.ubuntu.com

# This is what gets put in quotes next to the source
# It can be blank if it is a source with completely general info spanning many types of topics like Facebook.
Type: Ubuntu Package

# Whether the source is from MediaWiki (1) or not (0).
MediaWiki: 0

# Keywords uses to trigger (or prefer) the source over others.
Keywords: ubuntu package,deb
80 changes: 80 additions & 0 deletions ubuntu_pkgs/parse.py
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-

import logging
import os
import gzip
import string

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

class Package(object):
""" Contains informations about an Ubuntu package"""
def __init__(self, name, info, reference):
self.name = name
self.info = info
self.reference = reference

def __str__(self):
fields = [
self.name, # $page
'', # $namespace
self.reference, # $url
self.info, # $description
'', # $synopsis (code)
'', # $details
'A', # $type
'' # $lang
]

output = '%s' % ('\t'.join(fields))

return output


class Parser(object):
""" Parses a HTML file to get
all packages from it"""

UBUNTU_PKGS_URL = 'http://packages.ubuntu.com'

def __init__(self, input='download/allpackages?format=txt.gz'):
self.input = gzip.open(input, 'rb')

for x in range(6):
self.input.readline()

def get_packages(self):
""" """

self.packages = []
for line in self.input:
data = line.split(' ')

name = data[0]
info = data[3:]

if len(info) > 1:
info = ' '.join(info[1:])
else:
info = ' '.join(info)

# fix for agda-bin package; removing non-ascii characters
info = filter(lambda x: x in string.printable, info)
info = info.rstrip('\n')

reference = self.UBUNTU_PKGS_URL + '/' + name

package = Package(name, info, reference)
self.packages.append(package)

logger.info('Parsed package %s' % name)

if __name__ == '__main__':
parser = Parser()
parser.get_packages()

with open('output.txt', 'w') as output:
for package in parser.packages:
output.write(package.__str__().encode('utf-8') + '\n')
logger.info('Package added to output: %s' % package.name)
2 changes: 2 additions & 0 deletions ubuntu_pkgs/parse.sh
@@ -0,0 +1,2 @@
#!/bin/bash
python parse.py
Empty file added ubuntu_pkgs/queries.txt
Empty file.

0 comments on commit 11fa7e0

Please sign in to comment.