# Example 1: Training the Pseudoscientific Videos Detection Classifier

In [1]:
import os
os.chdir('../')

from dataset.DatasetUtils import DatasetUtils
from classifier.featureengineering.FeatureEngineeringModels import FeatureEngineeringModels
from classifier.training.ClassifierTraining import ClassifierTraining

# Create Objects
dataset = DatasetUtils()
featureEngineeringModels = FeatureEngineeringModels(dataset_object=dataset)



## Step 1. Fine-tune separate fastText models for each Video Metadata Type

In this step, we will fine-tune four separate fastText models, one for each different video metadata type,
which we use during the training of Deep Learning model to generate embeddings for each different video metadata type.
This step is only required to run once.

### 1.1. Video Snippet

In [None]:
# Generate Video Snippet fastText input features
featureEngineeringModels.prepare_fasttext_data(model_type='video_snippet')

# Fine-tune a fastText model for Video Snippet
featureEngineeringModels.finetune_model(model_type='video_snippet')

### 1.2. Video Tags

In [None]:
# Generate Video Tags fastText input features
featureEngineeringModels.prepare_fasttext_data(model_type='video_tags')

# Fine-tune a fastText model for Video Tags
featureEngineeringModels.finetune_model(model_type='video_tags')

### 1.3. Video Transcript

In [None]:
# Generate Video Transcript fastText input features
featureEngineeringModels.prepare_fasttext_data(model_type='video_transcript')

# Fine-tune a fastText model for Video Transcript
featureEngineeringModels.finetune_model(model_type='video_transcript')

### 1.4. Video Comments

In [None]:
# Generate Video Comments fastText input features
featureEngineeringModels.prepare_fasttext_data(model_type='video_comments')

# Fine-tune a fastText model for Video Comments
featureEngineeringModels.finetune_model(model_type='video_comments')

## Step 2. Train the Pseudoscience Deep Learning Model

At this step, we train and validate the Pseudoscientific Content Detection Deep Learning model using 10-fold cross-validation.
At the end of the training, the best model will be stored in: ```pseudoscientificvideosdetection\models\pseudoscience_model_final.hdf5```.

### 2.1. Create a Classifier Training Object

The creation of this object automatically creates the deep learning model

In [2]:
# Create ClassifierTraining Object
classifierTrainingObject = ClassifierTraining(dataset_object=dataset)



Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
video_comments_input (InputLaye [(None, 300)]        0                                            
__________________________________________________________________________________________________
video_snippet_input (InputLayer [(None, 300)]        0                                            
__________________________________________________________________________________________________
video_transcript_input (InputLa [(None, 300)]        0                                            
__________________________________________________________________________________________________
flatten (Flatten)               (None, 300)          0           video_comments_input[0][0]       
______________________________________________________________________________________________

### 2.2. Train the Classifier

In [4]:
classifierTrainingObject.train_model()

/n---Training the Model with 1971 videos.

--- [K-FOLD 1/10] TRAIN: 1773, TEST: 198
TOTAL VIDEOS: 1773 | TRAIN: 631, TEST: 159
--- Oversampling Train set...
--- [AFTER OVER-SAMPLING] TRAIN: 918, VAL: 159, TEST: 198

--- Classifier Training started...
3
[array([[-0.00154017,  0.00425594, -0.01610176, ...,  0.0909829 ,
         0.00650647, -0.00792344],
       [-0.02839425,  0.00769057, -0.00898624, ...,  0.08284736,
         0.00765291, -0.00735889],
       [ 0.00877337,  0.0220746 , -0.02116582, ...,  0.07865869,
         0.01977451, -0.0105785 ],
       ...,
       [ 0.00041521,  0.00509564, -0.02090871, ...,  0.08547921,
         0.00670899, -0.01469666],
       [-0.00853482,  0.00643449, -0.02708934, ...,  0.0946865 ,
         0.01245062, -0.00099109],
       [-0.00478722,  0.01052219, -0.01604749, ...,  0.09192502,
         0.00564561, -0.00994288]], dtype=float32), array([[-0.00168346,  0.00280175, -0.01627938, ...,  0.09156334,
         0.01212652, -0.00420917],
       [-0.006592

ValueError: in user code:

    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/training.py:1330 test_function  *
        return step_function(self, iterator)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/training.py:1320 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/training.py:1313 run_step  **
        outputs = model.test_step(data)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/training.py:1267 test_step
        y_pred = self(x, training=False)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Users/matus/opt/miniconda3/lib/python3.8/site-packages/keras/engine/input_spec.py:199 assert_input_compatibility
        raise ValueError('Layer ' + layer_name + ' expects ' +

    ValueError: Layer model expects 3 input(s), but it received 4 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 300) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 300) dtype=float32>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 300) dtype=float32>, <tf.Tensor 'IteratorGetNext:3' shape=(None, 3) dtype=float32>]
