Skip to content
This repository has been archived by the owner on Dec 24, 2022. It is now read-only.

Commit

Permalink
Completed YouTubeSearchAndWatchHistory link GH-60
Browse files Browse the repository at this point in the history
Also finished the architecture to enable data linking across multiple files.

Signed-off-by: Brian Evans <ebrian101@gmail.com>
  • Loading branch information
mrbrianevans committed Mar 26, 2022
1 parent c317122 commit 57b0bf5
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 39 deletions.
11 changes: 6 additions & 5 deletions client/src/components/FileUploader.svelte
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
<script lang='ts'>
import FilePond, { registerPlugin } from 'svelte-filepond'
import FilePondPluginImagePreview from 'filepond-plugin-image-preview'
import ProcessingWorker from '../workers/processingWorker?worker'
import type { PostProcessedOutput } from '../../../lib/typedefs/PostProcess'
import { onlyFilename } from '../../../lib/common/PathProcessing'
import { isMedia } from '../../../lib/common/isMedia'
import type { ProcessedFile } from '../../../lib/processFileContent'
import { processingWorkerWrapper } from '../workers/processingWorkerWrapper'
import { LoadingFile } from '../types/FileItem'
import type { LoadingFile } from '../types/FileItem'
import { linkDataSource } from '../../../lib/linking/LinkedDataSources'
// Register the plugins
// registerPlugin(FilePondPluginImagePreview)
Expand All @@ -16,7 +15,7 @@
// the name to use for the internal file input
let name = 'filepond'
export let files: (PostProcessedOutput | LoadingFile)[] = []
export let files: (ProcessedFile | LoadingFile)[] = []
async function handleAddFile(err, fileItem) {
// console.log('Relative path', fileItem.relativePath)
Expand All @@ -32,6 +31,8 @@
const workerOutput = await processingWorkerWrapper(fileItem.file)
// console.log('metadata', workerOutput.metadata)
files[files.findIndex(f => f === loadingFile)] = workerOutput
const links = linkDataSource(workerOutput, files.filter(file => !file.loading) as ProcessedFile[])
files = [...files, ...links]
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
<script>
<script lang='ts'>
import { YoutubeWatchSearchHistory } from '../../../../lib/linking/processors/youtubeWatchSearchHistory'
import JsonEditor from '../../components/JsonEditor.svelte'
import { minuteDifference } from '../../../../lib/common/DateUtils'
export let data: YoutubeWatchSearchHistory
</script>

<div>
<JsonEditor data={data} />
{#each data as { search, video }}
<p>You searched <b>{search.searchTerm}</b> on {search.date} and then watched <b>{video.video}</b> by {video.channel}
within five minutes. ({minuteDifference(video.date, search.date).toFixed(0)} minutes later)</p>
{/each}
</div>
6 changes: 3 additions & 3 deletions client/src/workers/processingWorkerWrapper.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import type { PostProcessedOutput } from '../../../lib/typedefs/PostProcess'
import ProcessingWorker from './processingWorker?worker'
import type { ProcessedFile } from '../../../lib/processFileContent'

export async function processingWorkerWrapper(
input: File
): Promise<PostProcessedOutput> {
): Promise<ProcessedFile> {
const processingWorker: Worker = new ProcessingWorker()
const workerOutput = await new Promise<PostProcessedOutput>((res) => {
const workerOutput = await new Promise<ProcessedFile>((res) => {
processingWorker.addEventListener('message', (e) => res(e.data), {
once: true
})
Expand Down
28 changes: 28 additions & 0 deletions lib/common/DateUtils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
export function parseDate(date: any) {
if (date instanceof Date || typeof date === 'number') return new Date(date)
else if (typeof date === 'string')
return new Date(date.replace('BST', '(British Summer Time)'))
else
throw new Error('Cannot parse date of type ' + typeof date + ' : ' + date)
}
export type DateFormatter = (date: Date) => string
// eg 2014-06-03T22:01:26
export const formatDateEurTime: DateFormatter = (date) => {
Expand Down Expand Up @@ -37,3 +44,24 @@ export const longDate: DateFormatter = (date) => {
dateStyle: 'long'
}).format(date)
}

/**
* Finds the difference between two dates in milliseconds.
*/
export function millisecondDifference(d1: Date, d2: Date) {
return parseDate(d1).getTime() - parseDate(d2).getTime()
}

/**
* Finds the difference between two dates in seconds.
*/
export function secondDifference(d1: Date, d2: Date) {
return millisecondDifference(d1, d2) / 1000
}

/**
* Finds the difference between two dates in minutes.
*/
export function minuteDifference(d1: Date, d2: Date) {
return secondDifference(d1, d2) / 60
}
4 changes: 2 additions & 2 deletions lib/common/TimeSeriesAnalysis.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { DateFormatter, formatDateEur } from './DateUtils'
import { DateFormatter, formatDateEur, parseDate } from './DateUtils'

interface TimeSeriesValue {
timestamp: Date
Expand Down Expand Up @@ -46,7 +46,7 @@ export class TimeSeries {
timestamp = new Date(dataPoint)
value = defaultValue
} else if (typeof dataPoint === 'string') {
timestamp = new Date(dataPoint.replace('BST', '(British Summer Time)'))
timestamp = parseDate(dataPoint)
value = defaultValue
} else if (typeof dataPoint === 'object') {
timestamp = new Date(dataPoint.timestamp)
Expand Down
59 changes: 52 additions & 7 deletions lib/linking/LinkedDataSources.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,75 @@
import { PostProcessedOutput, PostProcessor } from '../typedefs/PostProcess'
import { YoutubeWatchSearchHistoryLinkedProcessor } from './processors/youtubeWatchSearchHistory'
import { ComponentName } from '../typedefs/Components'
import type { ComponentName } from '../typedefs/Components'
import type { ProcessedFile } from '../processFileContent'
import type { PostProcessorCategory } from '../postProcessing/postProcessingCategoriser'

// every time a new file is added, check if there is the right combination for this list

export type LinkedProcessor = (
/**
* an array of PostProcessedOutputs of the matching data sources
*/
inputs: PostProcessedOutput[]
) => PostProcessedOutput | null
inputs: ProcessedFile[]
) => ProcessedFile | null

interface LinkedDataSource {
linker: LinkedProcessor
requiredPostprocessorCodes: string[]
requiredPostprocessorCodes: PostProcessorCategory[]
component: ComponentName
}

export const linkedDataSources: LinkedDataSource[] = [
// the necessary postprocessed data sources, the linker (which is a post processor itself)
{
requiredPostprocessorCodes: [
'youtube-watch-history',
'youtube-search-history'
'YouTubeWatchHistoryPostProcess',
'YouTubeSearchHistoryPostProcess'
],
linker: YoutubeWatchSearchHistoryLinkedProcessor,
component: 'YouTubeSearchAndWatchHistory'
}
]

export function linkDataSource(
dataSource: ProcessedFile,
dataSources: ProcessedFile[]
): ProcessedFile[] {
console.log('Finding links between datasources')
const links = []
linkers: for (const linkedDataSource of linkedDataSources) {
console.log('testing linker:', linkedDataSource.component)
if (
!linkedDataSource.requiredPostprocessorCodes.includes(
dataSource.metadata.postProcessingCategory
)
) {
// requirements don't include the new data source
continue
}

for (const requirement of linkedDataSource.requiredPostprocessorCodes) {
if (
dataSources.every(
(ds) => ds.metadata.postProcessingCategory !== requirement
) &&
dataSource.metadata.postProcessingCategory !== requirement
) {
// this requirement is not met
continue linkers
}
}
// requirements are met
links.push(
linkedDataSource.linker(
dataSources
.filter((d) =>
linkedDataSource.requiredPostprocessorCodes.includes(
d.metadata.postProcessingCategory
)
)
.concat(dataSource)
)
)
}
return links
}
20 changes: 12 additions & 8 deletions lib/linking/processors/youtubeWatchSearchHistory.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { LinkedProcessor } from '../LinkedDataSources'
import { YouTubeSearchHistory } from '../../vendors/google/youtube/YouTubeSearchHistory'
import { YouTubeWatchHistory } from '../../vendors/google/youtube/YouTubeWatchHistory'
import { parseDate } from '../../common/DateUtils'

export type YoutubeWatchSearchHistory = {
search: YouTubeSearchHistory[number]
Expand All @@ -14,36 +15,39 @@ export const YoutubeWatchSearchHistoryLinkedProcessor: LinkedProcessor = (
inputs
) => {
const searchHistoryInput = inputs.find(
(input) => input.metadata.filename === 'search-history.html'
(input) =>
input.metadata.postProcessingCategory ===
'YouTubeSearchHistoryPostProcess'
)
const watchHistoryInput = inputs.find(
(input) => input.metadata.filename === 'search-history.html'
(input) =>
input.metadata.postProcessingCategory === 'YouTubeWatchHistoryPostProcess'
)
if (!searchHistoryInput || !watchHistoryInput) return null

const searches = searchHistoryInput.data as YouTubeSearchHistory
const watched = watchHistoryInput.data as YouTubeWatchHistory
watched.sort(
(a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
(a, b) => parseDate(a.date).getTime() - parseDate(b.date).getTime()
)
// for each search, find the first video watched afterwards
const searchedVideos: YoutubeWatchSearchHistory = []
for (const search of searches) {
const searchDate = new Date(search.date)
const searchDate = parseDate(search.date)
const firstVideo = watched.find(
(video) => new Date(video.date) > searchDate
(video) => parseDate(video.date) > searchDate
)
const minThreshold = 1000 * 60 * 5 // video must be watched within 5 minutes of search
if (
firstVideo &&
new Date(firstVideo.date).getTime() - searchDate.getTime() < minThreshold
parseDate(firstVideo.date).getTime() - searchDate.getTime() < minThreshold
)
searchedVideos.push({ search, video: firstVideo })
}

return {
metadata: {},
data: searchedVideos,
title: 'Linked Youtube Data'
title: 'Linked Youtube Data',
component: 'YouTubeSearchAndWatchHistory'
}
}
2 changes: 1 addition & 1 deletion lib/postProcessing/postProcessingCategoriser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import {
} from './postProcessors/youtube'
import { DefaultHtmlPostProcess } from './postProcessors/genericFallbacks/html'

type PostProcessorCategory =
export type PostProcessorCategory =
| keyof typeof postProcessors
| keyof typeof genericPostProcessors
// order is important. the first one in this array that matches is used
Expand Down
23 changes: 15 additions & 8 deletions lib/processFileContent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,34 @@ import {
postProcessingCategoriser
} from './postProcessing/postProcessingCategoriser'
import { PostProcessedOutput } from './typedefs/PostProcess'
import { ComponentName } from './typedefs/Components'

export type ProcessedFile = PostProcessedOutput & {
component: ComponentName
}

export const processFileContent: (input: {
fileContent: string
filename: string
fileType: string
}) => PostProcessedOutput = ({ fileContent, filename, fileType }) => {
}) => ProcessedFile = ({ fileContent, filename, fileType }) => {
const preProcessingCategory = preProcessingCategoriser({ filename, fileType })
const preProcessor = preProcessorMap[preProcessingCategory]
const preProcessedOutput = preProcessor({ filename, fileType, fileContent })
Object.assign(preProcessedOutput.metadata, {
filename,
fileType,
fileContent
fileContent,
preProcessingCategory
})
const postCategory = postProcessingCategoriser({
const postProcessingCategory = postProcessingCategoriser({
filename,
fileType,
preProcessedOutput,
preProcessingCategory
})
console.log('Using', postCategory, 'for postprocessing', filename)
const postProcess = getPostProcessByCode(postCategory)
console.log('Using', postProcessingCategory, 'for postprocessing', filename)
const postProcess = getPostProcessByCode(postProcessingCategory)
const postProcessor =
postProcess?.postProcessingFunction ??
((i) => ({ ...i.preProcessedOutput }))
Expand All @@ -39,12 +45,13 @@ export const processFileContent: (input: {
Object.assign(postProcessedOutput.metadata, {
filename,
fileType,
fileContent
fileContent,
preProcessingCategory,
postProcessingCategory
})
const output = {
return {
...postProcessedOutput,
title: postProcess.name ?? filename,
component: postProcess.component
}
return output
}
7 changes: 5 additions & 2 deletions lib/typedefs/PreProcess.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { TimeSeriesMetadata } from '../common/TimeSeriesAnalysis'
import { FrequencyTables } from '../common/FrequencyAnalysis'
import type { TimeSeriesMetadata } from '../common/TimeSeriesAnalysis'
import type { FrequencyTables } from '../common/FrequencyAnalysis'
import type { GetTopicsOptions } from 'fast-topics'
import type { PostProcessorCategory } from '../postProcessing/postProcessingCategoriser'

export type PreProcessingCategory =
| 'json'
Expand Down Expand Up @@ -35,6 +36,8 @@ export interface WellKnownMetadata extends KeyValueObject {
fileContent?: string
filename?: string
fileType?: string
preProcessingCategory?: PreProcessingCategory
postProcessingCategory?: PostProcessorCategory
}

export type PreProcessedOutput<
Expand Down

0 comments on commit 57b0bf5

Please sign in to comment.