# Flickr data download

In [24]:
import flickrapi
import csv
import datetime
import itertools

We need to use Flickr API to require Flickr data. To use it, you need to have an API KEY and API SECRET to get permission to its API. Register an Flickr account and apply for them first.
Here is the [link](https://www.flickr.com/services/api/misc.api_keys.html). 

In [3]:
api_key = 'xxxxxxxxxxxxxx'
api_secret='xxxxxxxxxxxxxxxxx'
flickr = flickrapi.FlickrAPI(api_key, api_secret, cache=True)

Flickr.photos_search function will return a list of photos matching some criteria. It allows you to search photos based on location, date, tag and so on.

In [245]:
#Search all photos in a geographic area by defining a bounding box; 
#define start date and end date for the time span you want to download photos
begin_year = datetime.datetime(year, month, date, hour,minute,second)
end_year = datetime.datetime(year, month, date, hour,minute,second)
one_day = datetime.timedelta(days=1)
#Time range for your data
range_days = (end_year-begin_year).days
#Unlike standard photo queries, geo (or bounding box) queries will only return 250 results per page,so use the threshold value to get the maximal number of photos per page.
photos = flickr.photos.search(bbox='xxxxx',extras='geo,tags,date_taken,url_m',per_page=250)
#Flickr API has a limitation for the number of photos people can get it, if geo infomation is needed, only the first 2000 photos are allowed to query, so the number of pages cannot beyond 8 pages. 
pages=int(photos[0].attrib['pages'])
if pages > 8:
    pages = 8

In [246]:
#Create and open an empty csv file to store your data
f = open('xxxx.csv','w',encoding = 'utf-8')
#add header (data column names)
csv.DictWriter(f,['user','uid','lat','lon','title','tag','date_taken','url']).writeheader()

#Use for loops to iterately download all photos daily, 
#because generally the number of photos will not beyond the maximum number of photos Flickr API limited in this case. 

for day, page in itertools.product(range(range_days), range(pages)):
    next_day = begin_year + datetime.timedelta(day)
    photos = flickr.photos.search(bbox='xxxxx', content_type = 1,
                                    min_taken_date= next_day, max_taken_date= next_day + one_day,
                                    extras='geo,tags,date_taken,url_m',per_page=250,page=page+1)
    for photo in photos[0]:
            user = photo.get('owner')
            uid = photo.get('id')
            lat = photo.get('latitude')
            lon = photo.get('longitude')
            t1=photo.get('title')
            t2=photo.get('tags')
            datetaken = photo.get('datetaken')
            url=photo.get('url_m')
            
            col_names = [user,uid,lat,lon,t1,t2,datetaken,url]
            cols = [col if col is not None else 'Null' for col in col_names]

            f.write(','.join(cols)+ '\n')

f.close()  