forked from duckduckgo/zeroclickinfo-fathead
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
106 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Ubuntu Packages plugin for DukcDuckGo | ||
|
||
Dependencies: | ||
|
||
Python 2.7 | ||
gzip (should be included in Python) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
wget -q -P download -N 'http://packages.ubuntu.com/precise/allpackages?format=txt.gz' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# This is the name of the source as people would refer to it, e.g. Wikipedia or PerlDoc | ||
Name: Ubuntu Packages | ||
|
||
# This is the base domain where the source pages are located. | ||
Domain: packages.ubuntu.com | ||
|
||
# This is what gets put in quotes next to the source | ||
# It can be blank if it is a source with completely general info spanning many types of topics like Facebook. | ||
Type: Ubuntu Package | ||
|
||
# Whether the source is from MediaWiki (1) or not (0). | ||
MediaWiki: 0 | ||
|
||
# Keywords uses to trigger (or prefer) the source over others. | ||
Keywords: ubuntu package,deb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import logging | ||
import os | ||
import gzip | ||
import string | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger() | ||
|
||
class Package(object): | ||
""" Contains informations about an Ubuntu package""" | ||
def __init__(self, name, info, reference): | ||
self.name = name | ||
self.info = info | ||
self.reference = reference | ||
|
||
def __str__(self): | ||
fields = [ | ||
self.name, # $page | ||
'', # $namespace | ||
self.reference, # $url | ||
self.info, # $description | ||
'', # $synopsis (code) | ||
'', # $details | ||
'A', # $type | ||
'' # $lang | ||
] | ||
|
||
output = '%s' % ('\t'.join(fields)) | ||
|
||
return output | ||
|
||
|
||
class Parser(object): | ||
""" Parses a HTML file to get | ||
all packages from it""" | ||
|
||
UBUNTU_PKGS_URL = 'http://packages.ubuntu.com' | ||
|
||
def __init__(self, input='download/allpackages?format=txt.gz'): | ||
self.input = gzip.open(input, 'rb') | ||
|
||
for x in range(6): | ||
self.input.readline() | ||
|
||
def get_packages(self): | ||
""" """ | ||
|
||
self.packages = [] | ||
for line in self.input: | ||
data = line.split(' ') | ||
|
||
name = data[0] | ||
info = data[3:] | ||
|
||
if len(info) > 1: | ||
info = ' '.join(info[1:]) | ||
else: | ||
info = ' '.join(info) | ||
|
||
# fix for agda-bin package; removing non-ascii characters | ||
info = filter(lambda x: x in string.printable, info) | ||
info = info.rstrip('\n') | ||
|
||
reference = self.UBUNTU_PKGS_URL + '/' + name | ||
|
||
package = Package(name, info, reference) | ||
self.packages.append(package) | ||
|
||
logger.info('Parsed package %s' % name) | ||
|
||
if __name__ == '__main__': | ||
parser = Parser() | ||
parser.get_packages() | ||
|
||
with open('output.txt', 'w') as output: | ||
for package in parser.packages: | ||
output.write(package.__str__().encode('utf-8') + '\n') | ||
logger.info('Package added to output: %s' % package.name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/bin/bash | ||
python parse.py |
Empty file.