# [ Chapter 4 - Crowdsourced Relevance ] 
# Setting up the Retrotech Dataset

In [None]:
import sys
sys.path.append('..')
from aips import *
import os
from IPython.display import display,HTML
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("aips-ch4-getting-started-retrotech").getOrCreate()

## Download the Retrotech (Ecommerce) Products + Signals Dataset

In [None]:
#Get datasets
![ ! -d 'retrotech' ] && git clone --depth 1 https://github.com/ai-powered-search/retrotech.git
! cd retrotech && git pull
! cd retrotech && tar -xvf products.tgz -C '../../data/retrotech/' && tar -xvf signals.tgz -C '../../data/retrotech/'


## Get a Feel for the Product Catalog

### Listing 4.1

In [None]:
! cd ../data/retrotech/ && head products.csv

## Index the Products into the Search Engine

### Listing 4.2

In [None]:
engine = get_engine()
products_collection="products"
engine.create_collection(products_collection)
engine.populate_collection_from_csv(spark, products_collection,
                                    "../data/retrotech/products.csv")

## Verify Searches Work

### Listing 4.3

In [None]:
def products_request(query):
  return {
    "query": query,
    "fields": ["upc", "name", "manufacturer", "score"],
    "limit": 5,
    "params": {
      "qf": "name manufacturer longDescription",
      "defType": "edismax",
      "indent": "true",
      "sort": "score desc, upc asc"
    }
  }
  
query = "ipod"
collection = "products"
request = products_request(query)

response = engine.search(collection, request)
display(HTML(render_search_results(query, engine.docs_as_html(response))))

## Get a Feel for the Signals Data

In [None]:
! cd ../data/retrotech && head signals.csv

## Index the Signals into the Search Engine

### Listing 4.4

In [None]:
#Create Signals Collection
signals_collection="signals"
engine.create_collection(signals_collection)
engine.populate_collection_from_csv(spark, signals_collection,
                                    "../data/retrotech/signals.csv")

## Success!

You have now indexed the RetroTech product catalog and signals into the search engine, and run a sample query against the product collection. The results don't look very relevant using the out of the box keyword scoring function, of course, but we'll be working to improve that throughout the rest of this book!

In the next section, we'll take a look at our first crowd-sourced AI-powered search technique: [Signals Boosting](2.signals-boosting.ipynb). 