<a href=https://research.google.com/youtube8m/download.html>
<img src="https://1.bp.blogspot.com/-6ep4JQfWHN0/XRUznUhDQNI/AAAAAAAAERs/gy2ztDJto50eJ3XvmIlzD6Uxa4Z0ePd9wCLcBGAs/s1600/image1.png"/>
</a>    

---

# Frame-level features dataset

Frame-level features are stored as [tensorflow.SequenceExample](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto) [protocol buffers](https://developers.google.com/protocol-buffers/).

## SequenceExample Proto

```ruby
message SequenceExample {
  Features context = 1;
  FeatureLists feature_lists = 2;
};
```

### Features Proto

```ruby
message Features {
  map<string, Feature> feature = 1;
};

message Feature {
  oneof kind {
    BytesList bytes_list = 1;
    FloatList float_list = 2;
    Int64List int64_list = 3;
  }
};

message BytesList {
  repeated bytes value = 1;
}
message FloatList {
  repeated float value = 1 [packed = true];
}
message Int64List {
  repeated int64 value = 1 [packed = true];
}
```

### FeatureLists Proto

```ruby
message FeatureLists {
  map<string, FeatureList> feature_list = 1;
};

message FeatureList {
  repeated Feature feature = 1;
};
```

## Ex:

```ruby
context: {
  feature: {
    key  : "id"
    value: {
      bytes_list: {
        value: (Video id)
      }
    }
  }
  feature: {
    key  : "labels"
      value: {
        int64_list: {
          value: [1, 522, 11, 172]  # label list
        }
      }
    }
}

feature_lists: {
  feature_list: {
    key  : "rgb"
    value: {
      feature: {
        bytes_list: {
          value: [1024 8bit quantized features]
        }
      }
      feature: {
        bytes_list: {
          value: [1024 8bit quantized features]
        }
      }
      ... # Repeated for every second, up to 300
  }
  feature_list: {
    key  : "audio"
    value: {
      feature: {
        bytes_list: {
          value: [128 8bit quantized features]
        }
      }
      feature: {
        bytes_list: {
          value: [128 8bit quantized features]
        }
      }
    }
    ... # Repeated for every second, up to 300
  }
}
```

# Video-level features dataset

Video-level features are stored as [tensorflow.Example](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto) [protocol buffers](https://developers.google.com/protocol-buffers/).

## Example Proto

```ruby
message Example {
  Features features = 1;
};
```

## Ex:

```ruby
features: {
  feature: {
    key  : "id"
    value: {
      bytes_list: {
        value: (Video id)
      }
    }
  }
  feature: {
    key  : "labels"
    value: {
      int64_list: {
        value: [1, 522, 11, 172]  # label list
      }
    }
  }
  feature: {
    # Average of all 'rgb' features for the video
    key  : "mean_rgb"
    value: {
      float_list: {
        value: [1024 float features]
      }
    }
  }
  feature: {
    # Average of all 'audio' features for the video
    key  : "mean_audio"
    value: {
      float_list: {
        value: [128 float features]
      }
    }
  }
}
```

# Segment-rated frame-level features dataset (NEW)

Only frame-level features are available for the YouTube-8M Segment dataset. Each example contains the labels and features of a video in [tensorflow.SequenceExample](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto) format. The labels in the segment dataset has the same label mapping as the YouTube-8M video-level dataset. The features field contains the same format as the previous version of YouTube-8M frame-level features dataset.

## Ex:

```ruby
context: {
  feature: {
    key  : "id"
    value: {
      bytes_list: {
        value: (Video id)
      }
    }
  }
  feature: {
    key  : "labels" # video-level labels.
      value: {
        int64_list: {
          value: [ 441, 525 ]
        }
      }
    }
    feature: {
      key: "segment_start_times"
      value: {
        int64_list: {
          value: [ 40, 30, 50, 65, 90 ]
        }
      }
    }
    feature: {
      key: "segment_end_times"
      value: {
        int64_list: {
          value: [ 45, 35, 55, 70, 95 ]
        }
      }
    }
    feature: {
      key: "segment_labels"
      value: {
        int64_list: {
          value: [ 525, 525, 525, 525, 525 ]
        }
      }
    }
    feature: {
      key: "segment_scores"
      value: {
        float_list: {
          value: [ 0.0, 0.0, 0.0, 0.0, 1.0 ]
        }
      }
    }
  }
}
feature_lists: {
  # See the frame-level features section.
}
```